├── .editorconfig
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── collect-fingerprints-of-songs.py
├── config-development.sample.json
├── config.json
├── db
    └── .keep
├── get-database-stat.py
├── libs
    ├── __init__.py
    ├── config.py
    ├── db.py
    ├── db_mongo.py
    ├── db_sqlite.py
    ├── fingerprint.py
    ├── reader.py
    ├── reader_file.py
    ├── reader_microphone.py
    ├── visualiser.py
    ├── visualiser_console.py
    └── visualiser_plot.py
├── recognize-from-file.py
├── recognize-from-microphone.py
├── requirements.txt
├── reset-database.py
├── sql-execute.py
└── tests
    └── sqlite.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 2
 6 | insert_final_newline = true
 7 | trim_trailing_whitespace = true
 8 | end_of_line = lf
 9 | charset = utf-8
10 | 
11 | [*.py]
12 | max_line_length = 119
13 | 
14 | [Makefile]
15 | indent_style = tab
16 | insert_final_newline = false
17 | trim_trailing_whitespace = false
18 | 
19 | [*.md]
20 | insert_final_newline = false
21 | trim_trailing_whitespace = false
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | __pycache__/
 4 | *.py[cod]
 5 | *$py.class
 6 | 
 7 | pip-log.txt
 8 | pip-delete-this-directory.txt
 9 | 
10 | *.wav
11 | *.mp3
12 | 
13 | config-development.json
14 | 
15 | db/*.db
16 | db/*.db-journal
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Roman Rodomansky
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: tests
 2 | 
 3 | install:
 4 | 	@echo soon
 5 | 
 6 | clean:
 7 | 	@find . -name \*.pyc -delete
 8 | 
 9 | reset:
10 | 	@python reset-database.py
11 | 
12 | tests:
13 | 	@python tests/*.py
14 | 
15 | stat:
16 | 	@python get-database-stat.py
17 | 
18 | fingerprint-songs: clean
19 | 	@python collect-fingerprints-of-songs.py
20 | 
21 | recognize-listen: clean
22 | 	@python recognize-from-microphone.py -s $(seconds)
23 | 
24 | recognize-file: clean
25 | 	@python recognize-from-file.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fingerprint audio files & identify what's playing
 2 | 
 3 | - conference [PaceMaker: BackEnd-2016 conference](http://www.pacemaker.in.ua/BackEnd-2016/about)
 4 | - slides are on [slideshare.net/rodomansky/ok-shazam-la-lalalaa](http://www.slideshare.net/rodomansky/ok-shazam-la-lalalaa)
 5 | 
 6 | ![](http://new.tinygrab.com/7020c0e8b010392da4053fa90ab8e0c8419bded864.png)
 7 | 
 8 | ## How to set up 
 9 | 
10 | 1. Run `$ make clean reset` to clean & init database struct
11 | 1. Run `$ make tests` to make sure that everything is properly configurated
12 | 1. Copy some `.mp3` audio files into `mp3/` directory
13 | 1. Run `$ make fingerprint-songs` to analyze audio files & fill your db with hashes
14 | 1. Start play any of audio file (from any source) from `mp3/` directory, and run (parallely) `$ make recognize-listen seconds=5`
15 | 
16 | ![](http://new.tinygrab.com/7020c0e8b0393eec4a18c62170458c029577d378c2.png)
17 | 
18 | ## How to
19 | - To remove a specific song & related hash from db
20 | 
21 |   ```bash
22 |   $ python sql-execute.py -q "DELETE FROM songs WHERE id = 6;"
23 |   $ python sql-execute.py -q "DELETE FROM fingerprints WHERE song_fk = 6;"
24 |   ```
25 | 
26 | ## Thanks to
27 | - [How does Shazam work](http://coding-geek.com/how-shazam-works/)
28 | - [Audio fingerprinting and recognition in Python](https://github.com/worldveil/dejavu) - thanks for fingerprinting login via pynum
29 | - [Audio Fingerprinting with Python and Numpy](http://willdrevo.com/fingerprinting-and-audio-recognition-with-python/)
30 | - [Shazam It! Music Recognition Algorithms, Fingerprinting, and Processing](https://www.toptal.com/algorithms/shazam-it-music-processing-fingerprinting-and-recognition)
31 | - [Creating Shazam in Java](http://royvanrijn.com/blog/2010/06/creating-shazam-in-java/)
32 | 


--------------------------------------------------------------------------------
/collect-fingerprints-of-songs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import sys
 4 | import libs
 5 | import libs.fingerprint as fingerprint
 6 | 
 7 | from termcolor import colored
 8 | from libs.reader_file import FileReader
 9 | from libs.db_sqlite import SqliteDatabase
10 | from libs.config import get_config
11 | 
12 | if __name__ == '__main__':
13 |   config = get_config()
14 | 
15 |   db = SqliteDatabase()
16 |   path = "mp3/"
17 | 
18 |   # fingerprint all files in a directory
19 | 
20 |   for filename in os.listdir(path):
21 |     if filename.endswith(".mp3"):
22 |       reader = FileReader(path + filename)
23 |       audio = reader.parse_audio()
24 | 
25 |       song = db.get_song_by_filehash(audio['file_hash'])
26 |       song_id = db.add_song(filename, audio['file_hash'])
27 | 
28 |       msg = ' * %s %s: %s' % (
29 |         colored('id=%s', 'white', attrs=['dark']),       # id
30 |         colored('channels=%d', 'white', attrs=['dark']), # channels
31 |         colored('%s', 'white', attrs=['bold'])           # filename
32 |       )
33 |       print msg % (song_id, len(audio['channels']), filename)
34 | 
35 |       if song:
36 |         hash_count = db.get_song_hashes_count(song_id)
37 | 
38 |         if hash_count > 0:
39 |           msg = '   already exists (%d hashes), skip' % hash_count
40 |           print colored(msg, 'red')
41 | 
42 |           continue
43 | 
44 |       print colored('   new song, going to analyze..', 'green')
45 | 
46 |       hashes = set()
47 |       channel_amount = len(audio['channels'])
48 | 
49 |       for channeln, channel in enumerate(audio['channels']):
50 |         msg = '   fingerprinting channel %d/%d'
51 |         print colored(msg, attrs=['dark']) % (channeln+1, channel_amount)
52 | 
53 |         channel_hashes = fingerprint.fingerprint(channel, Fs=audio['Fs'], plots=config['fingerprint.show_plots'])
54 |         channel_hashes = set(channel_hashes)
55 | 
56 |         msg = '   finished channel %d/%d, got %d hashes'
57 |         print colored(msg, attrs=['dark']) % (
58 |           channeln+1, channel_amount, len(channel_hashes)
59 |         )
60 | 
61 |         hashes |= channel_hashes
62 | 
63 |       msg = '   finished fingerprinting, got %d unique hashes'
64 | 
65 |       values = []
66 |       for hash, offset in hashes:
67 |         values.append((song_id, hash, offset))
68 | 
69 |       msg = '   storing %d hashes in db' % len(values)
70 |       print colored(msg, 'green')
71 | 
72 |       db.store_fingerprints(values)
73 | 
74 |   print('end')
75 | 


--------------------------------------------------------------------------------
/config-development.sample.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "development",
3 | 
4 |   // you can use any of cloud-based or local mongo servers
5 |   // like https://mlab.com/
6 |   "db.dsn": "mongodb://user:password@cloud-domain:port/",
7 |   "db.database": "database-name-here"
8 | }
9 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": "default",
 3 | 
 4 |   "db.dsn": null,
 5 |   "db.database": null,
 6 |   "db.file": "db/fingerprints2.db",
 7 | 
 8 |   "channels": "2",
 9 |   "mic.visualise_console": true,
10 |   "mic.visualise_plot": false,
11 | 
12 |   "fingerprint.show_plots": false
13 | }
14 | 


--------------------------------------------------------------------------------
/db/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itspoma/audio-fingerprint-identifying-python/b6a3e1167763dee4006eee2d83600e621d62b270/db/.keep


--------------------------------------------------------------------------------
/get-database-stat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from libs.db_sqlite import SqliteDatabase
  3 | from termcolor import colored
  4 | 
  5 | # get summary information
  6 | def printSummary():
  7 |   row = db.executeOne("""
  8 |     SELECT
  9 |       (SELECT COUNT(*) FROM songs) as songs_count,
 10 |       (SELECT COUNT(*) FROM fingerprints) as fingerprints_count
 11 |   """)
 12 | 
 13 |   msg = ' * %s: %s (%s)' % (
 14 |     colored('total', 'yellow'),             # total
 15 |     colored('%d song(s)', 'yellow'),        # songs
 16 |     colored('%d fingerprint(s)', 'yellow')  # fingerprints
 17 |   )
 18 |   print msg % row
 19 | 
 20 |   return row[0] # total
 21 | 
 22 | # get songs \w details
 23 | def printSongs():
 24 |   rows = db.executeAll("""
 25 |     SELECT
 26 |       s.id,
 27 |       s.name,
 28 |       (SELECT count(f.id) FROM fingerprints AS f WHERE f.song_fk = s.id) AS fingerprints_count
 29 |     FROM songs AS s
 30 |     ORDER BY fingerprints_count DESC
 31 |   """)
 32 | 
 33 |   for row in rows:
 34 |     msg = '   ** %s %s: %s' % (
 35 |       colored('id=%s','white',attrs=['dark']), # id
 36 |       colored('%s', 'white', attrs=['bold']),   # name
 37 |       colored('%d hashes', 'green')             # hashes
 38 |     )
 39 |     print msg % row
 40 | 
 41 | # find duplicates
 42 | def printDuplicates():
 43 |   rows = db.executeAll("""
 44 |     SELECT a.song_fk, s.name, SUM(a.cnt)
 45 |     FROM (
 46 |       SELECT song_fk, COUNT(*) cnt
 47 |       FROM fingerprints
 48 |       GROUP BY hash, song_fk, offset
 49 |       HAVING cnt > 1
 50 |       ORDER BY cnt ASC
 51 |     ) a
 52 |     JOIN songs s ON s.id = a.song_fk
 53 |     GROUP BY a.song_fk
 54 |   """)
 55 | 
 56 |   msg = ' * duplications: %s' % colored('%d song(s)', 'yellow')
 57 |   print msg % len(rows)
 58 | 
 59 |   for row in rows:
 60 |     msg = '   ** %s %s: %s' % (
 61 |       colored('id=%s','white',attrs=['dark']),
 62 |       colored('%s', 'white', attrs=['bold']),
 63 |       colored('%d duplicate(s)', 'red')
 64 |     )
 65 |     print msg % row
 66 | 
 67 | # find colissions
 68 | def printColissions():
 69 |   rows = db.executeAll("""
 70 |     SELECT sum(a.n) FROM (
 71 |       SELECT
 72 |         hash,
 73 |         count(distinct song_fk) AS n
 74 |       FROM fingerprints
 75 |       GROUP BY `hash`
 76 |       ORDER BY n DESC
 77 |     ) a
 78 |   """)
 79 | 
 80 |   msg = ' * colissions: %s' % colored('%d hash(es)', 'red')
 81 |   val = 0
 82 |   if rows[0][0] is not None:
 83 |     val = rows[0]
 84 | 
 85 |   print msg % val
 86 | 
 87 | if __name__ == '__main__':
 88 |   db = SqliteDatabase()
 89 |   print ''
 90 | 
 91 |   x = printSummary()
 92 |   printSongs()
 93 |   if x: print ''
 94 | 
 95 |   printDuplicates()
 96 |   if x: print ''
 97 | 
 98 |   printColissions()
 99 | 
100 |   print '\ndone'
101 | 


--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 | # matplotlib.use('Agg')
3 | matplotlib.use('TkAgg')
4 | 
5 | def x():
6 |   print('XXX')
7 | 


--------------------------------------------------------------------------------
/libs/config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os.path
 3 | 
 4 | CONFIG_DEFAULT_FILE = 'config.json'
 5 | CONFIG_DEVELOPMENT_FILE = 'config-development.json'
 6 | 
 7 | # load config from multiple files,
 8 | # and return merged result
 9 | def get_config():
10 |   defaultConfig = {"env": "unknown"}
11 | 
12 |   return merge_configs(
13 |     defaultConfig,
14 |     parse_config(CONFIG_DEFAULT_FILE),
15 |     parse_config(CONFIG_DEVELOPMENT_FILE)
16 |   )
17 | 
18 | # parse config from specific filename
19 | # will return empty config if file not exists, or isn't readable
20 | def parse_config(filename):
21 |   config = {}
22 | 
23 |   if os.path.isfile(filename):
24 |     f = open(filename, 'r')
25 |     config = json.load(f)
26 |     f.close()
27 | 
28 |   return config
29 | 
30 | # @merge multiple dicts into one
31 | def merge_configs(*configs):
32 |   z = {}
33 | 
34 |   for config in configs:
35 |     z.update(config)
36 | 
37 |   return z
38 | 


--------------------------------------------------------------------------------
/libs/db.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | class Database(object):
 4 |   TABLE_SONGS = None
 5 |   TABLE_FINGERPRINTS = None
 6 | 
 7 |   def __init__(self, a):
 8 |     self.a = a
 9 | 
10 |   def connect(self): pass
11 |   def insert(self, table, params): pass
12 | 
13 |   def get_song_by_filehash(self, filehash):
14 |     return self.findOne(self.TABLE_SONGS, {
15 |       "filehash": filehash
16 |     })
17 | 
18 |   def get_song_by_id(self, id):
19 |     return self.findOne(self.TABLE_SONGS, {
20 |       "id": id
21 |     })
22 | 
23 |   def add_song(self, filename, filehash):
24 |     song = self.get_song_by_filehash(filehash)
25 | 
26 |     if not song:
27 |       song_id = self.insert(self.TABLE_SONGS, {
28 |         "name": filename,
29 |         "filehash": filehash
30 |       })
31 |     else:
32 |       song_id = song[0]
33 | 
34 |     return song_id
35 | 
36 |   def get_song_hashes_count(self, song_id):
37 |     pass
38 | 
39 |   def store_fingerprints(self, values):
40 |     self.insertMany(self.TABLE_FINGERPRINTS,
41 |       ['song_fk', 'hash', 'offset'], values
42 |     )
43 | 


--------------------------------------------------------------------------------
/libs/db_mongo.py:
--------------------------------------------------------------------------------
 1 | from pymongo import MongoClient
 2 | from db import Database
 3 | from config import get_config
 4 | 
 5 | class MongoDatabase(Database):
 6 |   def __init__(self):
 7 |     pass
 8 | 
 9 |   def connect(self):
10 |     config = get_config()
11 | 
12 |     self.client = MongoClient(config['db.dsn'])
13 |     self.db = self.client[config['db.database']]
14 | 
15 |   def insert(self, collection, document):
16 |     # if not self.db:
17 |     self.connect()
18 | 
19 |     return self.db[collection].insert_one(document).inserted_id
20 | 


--------------------------------------------------------------------------------
/libs/db_sqlite.py:
--------------------------------------------------------------------------------
 1 | from db import Database
 2 | from config import get_config
 3 | import sqlite3
 4 | import sys
 5 | from itertools import izip_longest
 6 | from termcolor import colored
 7 | 
 8 | class SqliteDatabase(Database):
 9 |   TABLE_SONGS = 'songs'
10 |   TABLE_FINGERPRINTS = 'fingerprints'
11 | 
12 |   def __init__(self):
13 |     self.connect()
14 | 
15 |   def connect(self):
16 |     config = get_config()
17 | 
18 |     self.conn = sqlite3.connect(config['db.file'])
19 |     self.conn.text_factory = str
20 | 
21 |     self.cur = self.conn.cursor()
22 | 
23 |     print(colored('sqlite - connection opened','white',attrs=['dark']))
24 | 
25 |   def __del__(self):
26 |     self.conn.commit()
27 |     self.conn.close()
28 |     print(colored('sqlite - connection has been closed','white',attrs=['dark']))
29 | 
30 |   def query(self, query, values = []):
31 |     self.cur.execute(query, values)
32 | 
33 |   def executeOne(self, query, values = []):
34 |     self.cur.execute(query, values)
35 |     return self.cur.fetchone()
36 | 
37 |   def executeAll(self, query, values = []):
38 |     self.cur.execute(query, values)
39 |     return self.cur.fetchall()
40 | 
41 |   def buildSelectQuery(self, table, params):
42 |     conditions = []
43 |     values = []
44 | 
45 |     for k, v in enumerate(params):
46 |       key = v
47 |       value = params[v]
48 |       conditions.append("%s = ?" % key)
49 |       values.append(value)
50 | 
51 |     conditions = ' AND '.join(conditions)
52 |     query = "SELECT * FROM %s WHERE %s" % (table, conditions)
53 | 
54 |     return {
55 |       "query": query,
56 |       "values": values
57 |     }
58 | 
59 |   def findOne(self, table, params):
60 |     select = self.buildSelectQuery(table, params)
61 |     return self.executeOne(select['query'], select['values'])
62 | 
63 |   def findAll(self, table, params):
64 |     select = self.buildSelectQuery(table, params)
65 |     return self.executeAll(select['query'], select['values'])
66 | 
67 |   def insert(self, table, params):
68 |     keys = ', '.join(params.keys())
69 |     values = params.values()
70 | 
71 |     query = "INSERT INTO songs (%s) VALUES (?, ?)" % (keys);
72 | 
73 |     self.cur.execute(query, values)
74 |     self.conn.commit()
75 | 
76 |     return self.cur.lastrowid
77 | 
78 |   def insertMany(self, table, columns, values):
79 |     def grouper(iterable, n, fillvalue=None):
80 |       args = [iter(iterable)] * n
81 |       return (filter(None, values) for values
82 |           in izip_longest(fillvalue=fillvalue, *args))
83 | 
84 |     for split_values in grouper(values, 1000):
85 |       query = "INSERT OR IGNORE INTO %s (%s) VALUES (?, ?, ?)" % (table, ", ".join(columns))
86 |       self.cur.executemany(query, split_values)
87 | 
88 |     self.conn.commit()
89 | 
90 |   def get_song_hashes_count(self, song_id):
91 |     query = 'SELECT count(*) FROM %s WHERE song_fk = %d' % (self.TABLE_FINGERPRINTS, song_id)
92 |     rows = self.executeOne(query)
93 |     return int(rows[0])
94 | 


--------------------------------------------------------------------------------
/libs/fingerprint.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import numpy as np
  3 | import matplotlib.mlab as mlab
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | from termcolor import colored
  7 | from scipy.ndimage.filters import maximum_filter
  8 | from scipy.ndimage.morphology import (generate_binary_structure, iterate_structure, binary_erosion)
  9 | from operator import itemgetter
 10 | 
 11 | IDX_FREQ_I = 0
 12 | IDX_TIME_J = 1
 13 | 
 14 | # Sampling rate, related to the Nyquist conditions, which affects
 15 | # the range frequencies we can detect.
 16 | DEFAULT_FS = 44100
 17 | 
 18 | # Size of the FFT window, affects frequency granularity
 19 | DEFAULT_WINDOW_SIZE = 4096
 20 | 
 21 | # Ratio by which each sequential window overlaps the last and the
 22 | # next window. Higher overlap will allow a higher granularity of offset
 23 | # matching, but potentially more fingerprints.
 24 | DEFAULT_OVERLAP_RATIO = 0.5
 25 | 
 26 | # Degree to which a fingerprint can be paired with its neighbors --
 27 | # higher will cause more fingerprints, but potentially better accuracy.
 28 | DEFAULT_FAN_VALUE = 15
 29 | 
 30 | # Minimum amplitude in spectrogram in order to be considered a peak.
 31 | # This can be raised to reduce number of fingerprints, but can negatively
 32 | # affect accuracy.
 33 | DEFAULT_AMP_MIN = 10
 34 | 
 35 | # Number of cells around an amplitude peak in the spectrogram in order
 36 | # for Dejavu to consider it a spectral peak. Higher values mean less
 37 | # fingerprints and faster matching, but can potentially affect accuracy.
 38 | PEAK_NEIGHBORHOOD_SIZE = 20
 39 | 
 40 | # Thresholds on how close or far fingerprints can be in time in order
 41 | # to be paired as a fingerprint. If your max is too low, higher values of
 42 | # DEFAULT_FAN_VALUE may not perform as expected.
 43 | MIN_HASH_TIME_DELTA = 0
 44 | MAX_HASH_TIME_DELTA = 200
 45 | 
 46 | # If True, will sort peaks temporally for fingerprinting;
 47 | # not sorting will cut down number of fingerprints, but potentially
 48 | # affect performance.
 49 | PEAK_SORT = True
 50 | 
 51 | # Number of bits to throw away from the front of the SHA1 hash in the
 52 | # fingerprint calculation. The more you throw away, the less storage, but
 53 | # potentially higher collisions and misclassifications when identifying songs.
 54 | FINGERPRINT_REDUCTION = 20
 55 | 
 56 | def fingerprint(channel_samples, Fs=DEFAULT_FS,
 57 |                 wsize=DEFAULT_WINDOW_SIZE,
 58 |                 wratio=DEFAULT_OVERLAP_RATIO,
 59 |                 fan_value=DEFAULT_FAN_VALUE,
 60 |                 amp_min=DEFAULT_AMP_MIN,
 61 |                 plots=False):
 62 | 
 63 |     # show samples plot
 64 |     if plots:
 65 |       plt.plot(channel_samples)
 66 |       plt.title('%d samples' % len(channel_samples))
 67 |       plt.xlabel('time (s)')
 68 |       plt.ylabel('amplitude (A)')
 69 |       plt.show()
 70 |       plt.gca().invert_yaxis()
 71 | 
 72 |     # FFT the channel, log transform output, find local maxima, then return
 73 |     # locally sensitive hashes.
 74 |     # FFT the signal and extract frequency components
 75 | 
 76 |     # plot the angle spectrum of segments within the signal in a colormap
 77 |     arr2D = mlab.specgram(
 78 |         channel_samples,
 79 |         NFFT=wsize,
 80 |         Fs=Fs,
 81 |         window=mlab.window_hanning,
 82 |         noverlap=int(wsize * wratio))[0]
 83 | 
 84 |     # show spectrogram plot
 85 |     if plots:
 86 |       plt.plot(arr2D)
 87 |       plt.title('FFT')
 88 |       plt.show()
 89 | 
 90 |     # apply log transform since specgram() returns linear array
 91 |     arr2D = 10 * np.log10(arr2D) # calculates the base 10 logarithm for all elements of arr2D
 92 |     arr2D[arr2D == -np.inf] = 0  # replace infs with zeros
 93 | 
 94 |     # find local maxima
 95 |     local_maxima = get_2D_peaks(arr2D, plot=plots, amp_min=amp_min)
 96 | 
 97 |     msg = '   local_maxima: %d of frequency & time pairs'
 98 |     print colored(msg, attrs=['dark']) % len(local_maxima)
 99 | 
100 |     # return hashes
101 |     return generate_hashes(local_maxima, fan_value=fan_value)
102 | 
103 | def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
104 |     # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure
105 |     struct = generate_binary_structure(2, 1)
106 |     neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE)
107 | 
108 |     # find local maxima using our fliter shape
109 |     local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
110 |     background = (arr2D == 0)
111 |     eroded_background = binary_erosion(background, structure=neighborhood,
112 |                                        border_value=1)
113 | 
114 |     # Boolean mask of arr2D with True at peaks
115 |     detected_peaks = local_max ^ eroded_background
116 | 
117 |     # extract peaks
118 |     amps = arr2D[detected_peaks]
119 |     j, i = np.where(detected_peaks)
120 | 
121 |     # filter peaks
122 |     amps = amps.flatten()
123 |     peaks = zip(i, j, amps)
124 |     peaks_filtered = [x for x in peaks if x[2] > amp_min]  # freq, time, amp
125 | 
126 |     # get indices for frequency and time
127 |     frequency_idx = [x[1] for x in peaks_filtered]
128 |     time_idx = [x[0] for x in peaks_filtered]
129 | 
130 |     # scatter of the peaks
131 |     if plot:
132 |       fig, ax = plt.subplots()
133 |       ax.imshow(arr2D)
134 |       ax.scatter(time_idx, frequency_idx)
135 |       ax.set_xlabel('Time')
136 |       ax.set_ylabel('Frequency')
137 |       ax.set_title("Spectrogram")
138 |       plt.gca().invert_yaxis()
139 |       plt.show()
140 | 
141 |     return zip(frequency_idx, time_idx)
142 | 
143 | # Hash list structure: sha1_hash[0:20] time_offset
144 | # example: [(e05b341a9b77a51fd26, 32), ... ]
145 | def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
146 |     if PEAK_SORT:
147 |       peaks.sort(key=itemgetter(1))
148 | 
149 |     # bruteforce all peaks
150 |     for i in range(len(peaks)):
151 |       for j in range(1, fan_value):
152 |         if (i + j) < len(peaks):
153 | 
154 |           # take current & next peak frequency value
155 |           freq1 = peaks[i][IDX_FREQ_I]
156 |           freq2 = peaks[i + j][IDX_FREQ_I]
157 | 
158 |           # take current & next -peak time offset
159 |           t1 = peaks[i][IDX_TIME_J]
160 |           t2 = peaks[i + j][IDX_TIME_J]
161 | 
162 |           # get diff of time offsets
163 |           t_delta = t2 - t1
164 | 
165 |           # check if delta is between min & max
166 |           if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
167 |             h = hashlib.sha1("%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
168 |             yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
169 | 


--------------------------------------------------------------------------------
/libs/reader.py:
--------------------------------------------------------------------------------
1 | class BaseReader(object):
2 |   def __init__(self, a):
3 |     self.a = a
4 | 
5 |   def recognize(self):
6 |     pass  # base class does nothing
7 | 


--------------------------------------------------------------------------------
/libs/reader_file.py:
--------------------------------------------------------------------------------
 1 | from reader import BaseReader
 2 | import os
 3 | from pydub import AudioSegment
 4 | from pydub.utils import audioop
 5 | import numpy as np
 6 | from hashlib import sha1
 7 | 
 8 | class FileReader(BaseReader):
 9 |   def __init__(self, filename):
10 |     # super(FileReader, self).__init__(a)
11 |     self.filename = filename
12 | 
13 |   """
14 |   Reads any file supported by pydub (ffmpeg) and returns the data contained
15 |   within. If file reading fails due to input being a 24-bit wav file,
16 |   wavio is used as a backup.
17 | 
18 |   Can be optionally limited to a certain amount of seconds from the start
19 |   of the file by specifying the `limit` parameter. This is the amount of
20 |   seconds from the start of the file.
21 | 
22 |   returns: (channels, samplerate)
23 |   """
24 |   # pydub does not support 24-bit wav files, use wavio when this occurs
25 |   def parse_audio(self):
26 |     limit = None
27 |     # limit = 10
28 | 
29 |     songname, extension = os.path.splitext(os.path.basename(self.filename))
30 | 
31 |     try:
32 |       audiofile = AudioSegment.from_file(self.filename)
33 | 
34 |       if limit:
35 |         audiofile = audiofile[:limit * 1000]
36 | 
37 |       data = np.fromstring(audiofile._data, np.int16)
38 | 
39 |       channels = []
40 |       for chn in xrange(audiofile.channels):
41 |         channels.append(data[chn::audiofile.channels])
42 | 
43 |       fs = audiofile.frame_rate
44 |     except audioop.error:
45 |       print('audioop.error')
46 |       pass
47 |         # fs, _, audiofile = wavio.readwav(filename)
48 | 
49 |         # if limit:
50 |         #     audiofile = audiofile[:limit * 1000]
51 | 
52 |         # audiofile = audiofile.T
53 |         # audiofile = audiofile.astype(np.int16)
54 | 
55 |         # channels = []
56 |         # for chn in audiofile:
57 |         #     channels.append(chn)
58 | 
59 |     return {
60 |       "songname": songname,
61 |       "extension": extension,
62 |       "channels": channels,
63 |       "Fs": audiofile.frame_rate,
64 |       "file_hash": self.parse_file_hash()
65 |     }
66 | 
67 |   def parse_file_hash(self, blocksize=2**20):
68 |     """ Small function to generate a hash to uniquely generate
69 |     a file. Inspired by MD5 version here:
70 |     http://stackoverflow.com/a/1131255/712997
71 | 
72 |     Works with large files.
73 |     """
74 |     s = sha1()
75 | 
76 |     with open(self.filename , "rb") as f:
77 |       while True:
78 |         buf = f.read(blocksize)
79 |         if not buf: break
80 |         s.update(buf)
81 | 
82 |     return s.hexdigest().upper()
83 | 


--------------------------------------------------------------------------------
/libs/reader_microphone.py:
--------------------------------------------------------------------------------
 1 | import pyaudio
 2 | import numpy
 3 | import wave
 4 | from reader import BaseReader
 5 | 
 6 | class MicrophoneReader(BaseReader):
 7 |   default_chunksize = 8192
 8 |   default_format = pyaudio.paInt16
 9 |   default_channels = 2
10 |   default_rate = 44100
11 |   default_seconds = 0
12 | 
13 |   # set default
14 |   def __init__(self, a):
15 |     super(MicrophoneReader, self).__init__(a)
16 |     self.audio = pyaudio.PyAudio()
17 |     self.stream = None
18 |     self.data = []
19 |     self.channels = MicrophoneReader.default_channels
20 |     self.chunksize = MicrophoneReader.default_chunksize
21 |     self.rate = MicrophoneReader.default_rate
22 |     self.recorded = False
23 | 
24 |   def start_recording(self, channels=default_channels,
25 |                       rate=default_rate,
26 |                       chunksize=default_chunksize,
27 |                       seconds=default_seconds):
28 |     self.chunksize = chunksize
29 |     self.channels = channels
30 |     self.recorded = False
31 |     self.rate = rate
32 | 
33 |     if self.stream:
34 |       self.stream.stop_stream()
35 |       self.stream.close()
36 | 
37 |     self.stream = self.audio.open(
38 |       format=self.default_format,
39 |       channels=channels,
40 |       rate=rate,
41 |       input=True,
42 |       frames_per_buffer=chunksize,
43 |     )
44 | 
45 |     self.data = [[] for i in range(channels)]
46 | 
47 |   def process_recording(self):
48 |     data = self.stream.read(self.chunksize)
49 | 
50 |     # http://docs.scipy.org/doc/numpy/reference/generated/numpy.fromstring.html
51 |     # A new 1-D array initialized from raw binary or text data in a string.
52 |     nums = numpy.fromstring(data, numpy.int16)
53 | 
54 |     for c in range(self.channels):
55 |       self.data[c].extend(nums[c::self.channels])
56 |       # self.data[c].append(data)
57 | 
58 |     return nums
59 | 
60 |   def stop_recording(self):
61 |     self.stream.stop_stream()
62 |     self.stream.close()
63 |     self.stream = None
64 |     self.recorded = True
65 | 
66 |   def get_recorded_data(self):
67 |     return self.data
68 | 
69 |   def save_recorded(self, output_filename):
70 |     wf = wave.open(output_filename, 'wb')
71 |     wf.setnchannels(self.channels)
72 |     wf.setsampwidth(self.audio.get_sample_size(self.default_format))
73 |     wf.setframerate(self.rate)
74 | 
75 |     # values = ','.join(str(v) for v in self.data[1])
76 |     # numpydata = numpy.hstack(self.data[1])
77 | 
78 |     chunk_length = len(self.data[0]) / self.channels
79 |     result = numpy.reshape(self.data[0], (chunk_length, self.channels))
80 |     # wf.writeframes(b''.join(numpydata))
81 |     wf.writeframes(result)
82 |     wf.close()
83 | 
84 |   def play(self):
85 |     pass
86 | 
87 |   def get_recorded_time(self):
88 |     return len(self.data[0]) / self.rate
89 | 


--------------------------------------------------------------------------------
/libs/visualiser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itspoma/audio-fingerprint-identifying-python/b6a3e1167763dee4006eee2d83600e621d62b270/libs/visualiser.py


--------------------------------------------------------------------------------
/libs/visualiser_console.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class VisualiserConsole():
 4 |   def __init__(self):
 5 |     pass
 6 | 
 7 |   @staticmethod
 8 |   def calc(data):
 9 |     peak = np.average(np.abs(data)) * 2
10 |     bars = "#" * int(200 * peak / 2**16)
11 |     return (peak, bars)
12 | 


--------------------------------------------------------------------------------
/libs/visualiser_plot.py:
--------------------------------------------------------------------------------
 1 | from matplotlib import pyplot
 2 | 
 3 | class VisualiserPlot():
 4 |   def __init__(self):
 5 |     pass
 6 | 
 7 |   @staticmethod
 8 |   def show(data):
 9 |     pyplot.plot(data)
10 |     pyplot.show()
11 | 


--------------------------------------------------------------------------------
/recognize-from-file.py:
--------------------------------------------------------------------------------
 1 | from libs.reader_microphone import FileReader
 2 | 
 3 | song = None
 4 | seconds = 5
 5 | 
 6 | r = FileReader(123)
 7 | r.recognize(seconds=seconds)
 8 | 
 9 | print(song)
10 | 


--------------------------------------------------------------------------------
/recognize-from-microphone.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import os
  3 | import sys
  4 | import libs
  5 | import libs.fingerprint as fingerprint
  6 | import argparse
  7 | 
  8 | from argparse import RawTextHelpFormatter
  9 | from itertools import izip_longest
 10 | from termcolor import colored
 11 | from libs.config import get_config
 12 | from libs.reader_microphone import MicrophoneReader
 13 | from libs.visualiser_console import VisualiserConsole as visual_peak
 14 | from libs.visualiser_plot import VisualiserPlot as visual_plot
 15 | from libs.db_sqlite import SqliteDatabase
 16 | # from libs.db_mongo import MongoDatabase
 17 | 
 18 | if __name__ == '__main__':
 19 |   config = get_config()
 20 | 
 21 |   db = SqliteDatabase()
 22 | 
 23 |   parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter)
 24 |   parser.add_argument('-s', '--seconds', nargs='?')
 25 |   args = parser.parse_args()
 26 | 
 27 |   if not args.seconds:
 28 |     parser.print_help()
 29 |     sys.exit(0)
 30 | 
 31 |   seconds = int(args.seconds)
 32 | 
 33 |   chunksize = 2**12  # 4096
 34 |   channels = 2#int(config['channels']) # 1=mono, 2=stereo
 35 | 
 36 |   record_forever = False
 37 |   visualise_console = bool(config['mic.visualise_console'])
 38 |   visualise_plot = bool(config['mic.visualise_plot'])
 39 | 
 40 |   reader = MicrophoneReader(None)
 41 | 
 42 |   reader.start_recording(seconds=seconds,
 43 |     chunksize=chunksize,
 44 |     channels=channels)
 45 | 
 46 |   msg = ' * started recording..'
 47 |   print colored(msg, attrs=['dark'])
 48 | 
 49 |   while True:
 50 |     bufferSize = int(reader.rate / reader.chunksize * seconds)
 51 | 
 52 |     for i in range(0, bufferSize):
 53 |       nums = reader.process_recording()
 54 | 
 55 |       if visualise_console:
 56 |         msg = colored('   %05d', attrs=['dark']) + colored(' %s', 'green')
 57 |         print msg  % visual_peak.calc(nums)
 58 |       else:
 59 |         msg = '   processing %d of %d..' % (i, bufferSize)
 60 |         print colored(msg, attrs=['dark'])
 61 | 
 62 |     if not record_forever: break
 63 | 
 64 |   if visualise_plot:
 65 |     data = reader.get_recorded_data()[0]
 66 |     visual_plot.show(data)
 67 | 
 68 |   reader.stop_recording()
 69 | 
 70 |   msg = ' * recording has been stopped'
 71 |   print colored(msg, attrs=['dark'])
 72 | 
 73 | 
 74 | 
 75 |   def grouper(iterable, n, fillvalue=None):
 76 |     args = [iter(iterable)] * n
 77 |     return (filter(None, values) for values
 78 |             in izip_longest(fillvalue=fillvalue, *args))
 79 | 
 80 |   data = reader.get_recorded_data()
 81 | 
 82 |   msg = ' * recorded %d samples'
 83 |   print colored(msg, attrs=['dark']) % len(data[0])
 84 | 
 85 |   # reader.save_recorded('test.wav')
 86 | 
 87 | 
 88 |   Fs = fingerprint.DEFAULT_FS
 89 |   channel_amount = len(data)
 90 | 
 91 |   result = set()
 92 |   matches = []
 93 | 
 94 |   def find_matches(samples, Fs=fingerprint.DEFAULT_FS):
 95 |     hashes = fingerprint.fingerprint(samples, Fs=Fs)
 96 |     return return_matches(hashes)
 97 | 
 98 |   def return_matches(hashes):
 99 |     mapper = {}
100 |     for hash, offset in hashes:
101 |       mapper[hash.upper()] = offset
102 |     values = mapper.keys()
103 | 
104 |     for split_values in grouper(values, 1000):
105 |       # @todo move to db related files
106 |       query = """
107 |         SELECT upper(hash), song_fk, offset
108 |         FROM fingerprints
109 |         WHERE upper(hash) IN (%s)
110 |       """
111 |       query = query % ', '.join('?' * len(split_values))
112 | 
113 |       x = db.executeAll(query, split_values)
114 |       matches_found = len(x)
115 | 
116 |       if matches_found > 0:
117 |         msg = '   ** found %d hash matches (step %d/%d)'
118 |         print colored(msg, 'green') % (
119 |           matches_found,
120 |           len(split_values),
121 |           len(values)
122 |         )
123 |       else:
124 |         msg = '   ** not matches found (step %d/%d)'
125 |         print colored(msg, 'red') % (
126 |           len(split_values),
127 |           len(values)
128 |         )
129 | 
130 |       for hash, sid, offset in x:
131 |         # (sid, db_offset - song_sampled_offset)
132 |         yield (sid, offset - mapper[hash])
133 | 
134 |   for channeln, channel in enumerate(data):
135 |     # TODO: Remove prints or change them into optional logging.
136 |     msg = '   fingerprinting channel %d/%d'
137 |     print colored(msg, attrs=['dark']) % (channeln+1, channel_amount)
138 | 
139 |     matches.extend(find_matches(channel))
140 | 
141 |     msg = '   finished channel %d/%d, got %d hashes'
142 |     print colored(msg, attrs=['dark']) % (
143 |       channeln+1, channel_amount, len(matches)
144 |     )
145 | 
146 |   def align_matches(matches):
147 |     diff_counter = {}
148 |     largest = 0
149 |     largest_count = 0
150 |     song_id = -1
151 | 
152 |     for tup in matches:
153 |       sid, diff = tup
154 | 
155 |       if diff not in diff_counter:
156 |         diff_counter[diff] = {}
157 | 
158 |       if sid not in diff_counter[diff]:
159 |         diff_counter[diff][sid] = 0
160 | 
161 |       diff_counter[diff][sid] += 1
162 | 
163 |       if diff_counter[diff][sid] > largest_count:
164 |         largest = diff
165 |         largest_count = diff_counter[diff][sid]
166 |         song_id = sid
167 | 
168 |     songM = db.get_song_by_id(song_id)
169 | 
170 |     nseconds = round(float(largest) / fingerprint.DEFAULT_FS *
171 |                      fingerprint.DEFAULT_WINDOW_SIZE *
172 |                      fingerprint.DEFAULT_OVERLAP_RATIO, 5)
173 | 
174 |     return {
175 |         "SONG_ID" : song_id,
176 |         "SONG_NAME" : songM[1],
177 |         "CONFIDENCE" : largest_count,
178 |         "OFFSET" : int(largest),
179 |         "OFFSET_SECS" : nseconds
180 |     }
181 | 
182 |   total_matches_found = len(matches)
183 | 
184 |   print ''
185 | 
186 |   if total_matches_found > 0:
187 |     msg = ' ** totally found %d hash matches'
188 |     print colored(msg, 'green') % total_matches_found
189 | 
190 |     song = align_matches(matches)
191 | 
192 |     msg = ' => song: %s (id=%d)\n'
193 |     msg += '    offset: %d (%d secs)\n'
194 |     msg += '    confidence: %d'
195 | 
196 |     print colored(msg, 'green') % (
197 |       song['SONG_NAME'], song['SONG_ID'],
198 |       song['OFFSET'], song['OFFSET_SECS'],
199 |       song['CONFIDENCE']
200 |     )
201 |   else:
202 |     msg = ' ** not matches found at all'
203 |     print colored(msg, 'red')
204 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1
2 | termcolor
3 | pyaudio
4 | wave
5 | pydub
6 | 


--------------------------------------------------------------------------------
/reset-database.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from libs.db_sqlite import SqliteDatabase
 3 | 
 4 | if __name__ == '__main__':
 5 |   db = SqliteDatabase()
 6 | 
 7 |   #
 8 |   # songs table
 9 | 
10 |   db.query("DROP TABLE IF EXISTS songs;")
11 |   print('removed db.songs');
12 | 
13 |   db.query("""
14 |     CREATE TABLE songs (
15 |       id  INTEGER PRIMARY KEY AUTOINCREMENT,
16 |       name  TEXT,
17 |       filehash  TEXT
18 |     );
19 |   """)
20 |   print('created db.songs');
21 | 
22 |   #
23 |   # fingerprints table
24 | 
25 |   db.query("DROP TABLE IF EXISTS fingerprints;")
26 |   print('removed db.fingerprints');
27 | 
28 |   db.query("""
29 |     CREATE TABLE `fingerprints` (
30 |       `id`  INTEGER PRIMARY KEY AUTOINCREMENT,
31 |       `song_fk` INTEGER,
32 |       `hash`  TEXT,
33 |       `offset`  INTEGER
34 |     );
35 |   """)
36 |   print('created db.fingerprints');
37 | 
38 |   print('done');
39 | 


--------------------------------------------------------------------------------
/sql-execute.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import argparse
 3 | import sys
 4 | 
 5 | from libs.db_sqlite import SqliteDatabase
 6 | from termcolor import colored
 7 | from argparse import RawTextHelpFormatter
 8 | 
 9 | if __name__ == '__main__':
10 |   parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter)
11 |   parser.add_argument('-q', '--query', nargs='?')
12 |   args = parser.parse_args()
13 | 
14 |   if not args.query:
15 |     parser.print_help()
16 |     sys.exit(0)
17 | 
18 |   db = SqliteDatabase()
19 | 
20 |   row = db.executeOne(args.query)
21 | 
22 |   print row
23 | 


--------------------------------------------------------------------------------
/tests/sqlite.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys, os
 3 | sys.path.append(os.path.join(sys.path[0], '..'))
 4 | 
 5 | from libs.db_sqlite import SqliteDatabase
 6 | from termcolor import colored
 7 | 
 8 | if __name__ == '__main__':
 9 |   db = SqliteDatabase()
10 | 
11 |   row = db.executeOne("SELECT 2+3 as x;")
12 | 
13 |   assert row[0] == 5, "failed simple sql execution"
14 |   print ' * %s' % colored('ok', 'green')
15 | 


--------------------------------------------------------------------------------