├── .gitignore ├── FOBI.py ├── FastICA.py ├── README.md ├── image_FastICA_FOBI.py ├── images ├── after │ ├── fastica │ │ ├── dos.jpg │ │ ├── tres.jpg │ │ └── unos.jpg │ └── fobi │ │ ├── dos.jpg │ │ ├── tres.jpg │ │ └── unos.jpg ├── bnw │ ├── baboon.jpg │ ├── lena.jpg │ └── peppers.jpg ├── mixed │ ├── dos.jpg │ ├── tres.jpg │ └── unos.jpg ├── mixing_image.py ├── original │ ├── baboon.jpg │ ├── lena.jpg │ └── peppers.jpg ├── preprocess_image.py └── utilities.py ├── papers ├── FOBI.pdf └── FastICA.pdf ├── plots ├── images │ ├── blackNwhite.jpg │ ├── fobi.jpg │ ├── fobi.png │ ├── ica.jpg │ ├── ica_g2.jpg │ ├── ica_g3.jpg │ ├── ica_g3.png │ ├── ica_g4.jpg │ ├── mixed.jpg │ ├── original.jpg │ └── white_tranform.jpg └── sounds │ ├── Ring_StarWars_mixed.jpg │ ├── Ring_StarWars_original.jpg │ ├── Ring_StarWars_separated.jpg │ └── Ring_StarWars_separated_FOBI.jpg ├── sound_FastICA_FOBI.py ├── sounds ├── FOBIseparateX.wav ├── FOBIseparateY.wav ├── mixedX.wav ├── mixedY.wav ├── mixing_sound.py ├── preprocess_sound.py ├── preprocess_sound.pyc ├── separateX.wav ├── separateY.wav ├── sourceX.wav ├── sourceY.wav └── utilities.py └── utilities.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | ./papers/* 3 | ./papers -------------------------------------------------------------------------------- /FOBI.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def FOBI(X): 4 | """Fourth Order Blind Identification technique is used. 5 | The function returns the unmixing matrix. 6 | X is assumed to be centered and whitened. 7 | The paper by J. Cardaso is in itself the best resource out there for it. 8 | SOURCE SEPARATION USING HIGHER ORDER MOMENTS - Jean-Francois Cardoso""" 9 | 10 | rows = X.shape[0] 11 | n = X.shape[1] 12 | # Initializing the weighted covariance matrix which will hold the fourth order information 13 | weightedCovMatrix = np.zeros([rows, rows]) 14 | 15 | # Approximating the expectation by diving with the number of data points 16 | for signal in X.T: 17 | norm = np.linalg.norm(signal) 18 | weightedCovMatrix += norm*norm*np.outer(signal, signal) 19 | 20 | weightedCovMatrix /= n 21 | 22 | # Doing the eigen value decomposition 23 | eigValue, eigVector = np.linalg.eigh(weightedCovMatrix) 24 | 25 | # print eigVector 26 | return eigVector 27 | -------------------------------------------------------------------------------- /FastICA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | np.random.seed(7) 3 | 4 | def g1(u): 5 | return np.tanh(u) 6 | 7 | def g1_dash(u): 8 | d = g1(u) 9 | return 1 - d*d 10 | 11 | def g2(u): 12 | return u*np.exp(-(u*u)/2) 13 | 14 | def g2_dash(u): 15 | return (1 - u*u)*np.exp(-(u*u)/2) 16 | 17 | def g3(u): 18 | return 1/(1 + np.exp(-u)) 19 | 20 | def g3_dash(u): 21 | d = g3(u) 22 | return d*(1 - d) 23 | 24 | def g4(u): 25 | return u*u*u 26 | 27 | def g4_dash(u): 28 | return 3*u*u 29 | 30 | def FastICA(X, vectors, eps): 31 | """FastICA technique is used. 32 | The function returns one independent component. 33 | X is assumed to be centered and whitened. 34 | The paper by A. Hyvarinen and E. Oja is in itself the best resource out there for it. 35 | Independent Component Analysis:Algorithms and Applications - A. Hyvarinen and E. Oja 36 | """ 37 | # The size of w1 is determined by the number of images 38 | size = X.shape[0] 39 | n = X.shape[1] 40 | # Initial weight vector 41 | w1 = np.random.rand(size) 42 | w2 = np.random.rand(size) 43 | # Making the vector of unit norm 44 | w1 = w1/np.linalg.norm(w1) 45 | w2 = w2/np.linalg.norm(w2) 46 | 47 | while( np.abs(np.dot(w1.T,w2)) < (1 - eps)): 48 | w1 = w2 49 | # first is E{xg(W.T*x)} term 50 | first = np.dot(X, g3(np.dot(w2.T, X)))/n 51 | # second is E{g_dash(W.T*x)}*W term 52 | second = np.mean(g3_dash(np.dot(w2.T, X)))*w2 53 | # Update step 54 | w2 = first - second 55 | # Using Gram-Schmidt deflation to decorelate the vectors 56 | w3 = w2 57 | for vector in vectors: 58 | w3 = w3 - np.dot(w2.T, vector)*vector 59 | w2 = w3 60 | w2 = w2/np.linalg.norm(w2) 61 | 62 | return w1 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cocktail Party Problem 2 | 3 | Cocktail Party Problem or the Blind Source Separation is a classical problem. 4 | The motivation for this problem is, imagine yourself in a party with a lot of people. 5 | There will be a sort of cacaphony becasue of all the people taling at the same time. 6 | Now you can shut out the voices in the background to hear some specific conversation. 7 | We also want to do the same and let's formalize what we are trying to do. 8 | Let's say there are m sources which are producing signal according to some distribution 9 | independent of each other and we have n microphones which record the signals arriving at them. 10 | We try to decipher the underlying source signals from the mixed signals arriving at the microphones. 11 | We will try and constraint the problem a little more so that we can move forward. 12 | The first assumption is that the mixed signals are a **linear combination** of the source signals. 13 | The second assumption is that the **number of source and number of microphones are equal.** 14 | 15 | ## Using ICA to solve the problem for images as well as sound signals 16 | 17 | The same algorithm will be tried on images, to try and separate the mixed images and get the original images back. 18 | To try the code: 19 | - fork the repository and place the mixed images in `images/mixed/` folder or for sound signals `sounds/` folder. 20 | - Run the `preprocess_sound.py` in `sounds/` to get the same sampling rate for each sound signal.(You may have to change the filenames in the code) 21 | - Run `image_FastICA_FOBI.py` or `image_FastICA_FOBI.py`. 22 | - And that's it you are done! The output will be in the same directory for the sounds and `images/after/` folder for images. 23 | 24 | ### FastICA and FOBI applied to images 25 | 26 | The FastICA algorithm uses a fixed-point iteration scheme to try and find the independent componenets. 27 | It is very sensitive to the functions used for approximating the negentropy. 28 | The math behind the algorithm takes some time to understand but intuitively they are trying to find the vectors which 29 | maximizes the non-gaussanity of the signals. 30 | To understand the algorithm fully I would encourage you to dive straight into the paper itself 31 | [Independent Component Analysis:Algorithms and Applications](http://www.sciencedirect.com/science/article/pii/S0893608000000265) - *A. Hyvärinen and E. Oja* 32 | 33 | FOBI is a one shot algorithm which tries to solve the same problem with matrix factorization and finding the eigenvectors 34 | of a quadratically weighted covariance matrix of the data. The eigenvectors form the mixing matrix and is orthogonal 35 | which is to be expected. 36 | The proof and math behind the algorithm is neatly explained in the original paper 37 | [SOURCE SEPARATION USING HIGHER ORDER MOMENTS](http://ieeexplore.ieee.org/document/266878/) - *Jean-Francois Cardoso* 38 | 39 | The original colored images 40 | ![Original](./plots/images/original.jpg) 41 | 42 | The black and white images which were used 43 | ![blackNwhite](./plots/images/blackNwhite.jpg) 44 | 45 | The images after they were mixed linearly 46 | ![mixed](./plots/images/mixed.jpg) 47 | 48 | The results of running the FOBI algorithm 49 | ![fobi](./plots/images/fobi.jpg) 50 | 51 | The result of running the FastICA algorithm 52 | ![ica](./plots/images/ica.jpg) 53 | 54 | ### FastICA and FOBI applied to sounds 55 | 56 | The above 2 algorithms work straight out of the box for sounds as well. 57 | 58 | The original sounds 59 | ![Original](./plots/sounds/Ring_StarWars_original.jpg) 60 | 61 | The mixed sounds 62 | ![Mixed](./plots/sounds/Ring_StarWars_mixed.jpg) 63 | 64 | Seperated sounds using FOBI 65 | ![fobi](./plots/sounds/Ring_StarWars_separated_FOBI.jpg) 66 | 67 | Seperated sounds using FastICA 68 | ![Mixed](./plots/sounds/Ring_StarWars_separated.jpg) 69 | 70 | To listen to the separated sounds, head over to `sounds/` folder. 71 | The sounds are labelled so that there is no confusion. 72 | 73 | Enjoy! 74 | -------------------------------------------------------------------------------- /image_FastICA_FOBI.py: -------------------------------------------------------------------------------- 1 | import utilities as utl 2 | from FastICA import FastICA 3 | from FOBI import FOBI 4 | import numpy as np 5 | np.random.seed(7) 6 | 7 | eps = 0.00000001 8 | 9 | # Read the images from ./images/mixed 10 | names = ["unos", "dos", "tres"] 11 | images = utl.listImages(names, "mixed") 12 | 13 | # The images are mean centered 14 | centImages = [] 15 | for image in images: 16 | rescaleImage = image 17 | centImage = rescaleImage - np.mean(rescaleImage) 18 | centImages.append(centImage) 19 | 20 | # The images are whitened, the helper function is in utilities.py 21 | whiteImages = utl.whitenMatrix(utl.list2matrix(centImages)) 22 | 23 | # Uncomment the lines below to plot the images after whitening 24 | # utl.plotImages(utl.matrix2list(whiteImages), names, "../white_tranform", True, False) 25 | 26 | # The images are now converted into time series data 27 | # X is a 3*image_size matrix, with each row representing a image 28 | X = whiteImages 29 | 30 | # Find the individual components one by one 31 | vectors = [] 32 | for i in range(0, len(images)): 33 | vector = FastICA(X, vectors, eps) 34 | # print vector 35 | vectors.append(vector) 36 | 37 | # Stack the vectors to form the unmixing matrix 38 | W = np.vstack(vectors) 39 | 40 | # Get the original matrix 41 | S = np.dot(W, whiteImages) 42 | 43 | # Get the unmixed images 44 | uimages = utl.matrix2list(S) 45 | 46 | # Unmixing matrix through FOBI 47 | fobiW = FOBI(X) 48 | 49 | # Get the original matrix using fobiW 50 | fobiS = np.dot(fobiW.T, whiteImages) 51 | 52 | # Get the unmixed images through FOBI 53 | fobi_uimages = utl.matrix2list(fobiS) 54 | 55 | # Plot the unmixed images for FastICA and save them as well 56 | utl.plotImages(uimages, names, "ica_g3", True, True) 57 | utl.saveImages(uimages, names, "after/fastica") 58 | 59 | # Plot the unmixed images for FOBI and save them as well 60 | utl.plotImages(fobi_uimages, names, "fobi", True, True) 61 | utl.saveImages(fobi_uimages, names, "after/fobi") 62 | -------------------------------------------------------------------------------- /images/after/fastica/dos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/after/fastica/dos.jpg -------------------------------------------------------------------------------- /images/after/fastica/tres.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/after/fastica/tres.jpg -------------------------------------------------------------------------------- /images/after/fastica/unos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/after/fastica/unos.jpg -------------------------------------------------------------------------------- /images/after/fobi/dos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/after/fobi/dos.jpg -------------------------------------------------------------------------------- /images/after/fobi/tres.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/after/fobi/tres.jpg -------------------------------------------------------------------------------- /images/after/fobi/unos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/after/fobi/unos.jpg -------------------------------------------------------------------------------- /images/bnw/baboon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/bnw/baboon.jpg -------------------------------------------------------------------------------- /images/bnw/lena.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/bnw/lena.jpg -------------------------------------------------------------------------------- /images/bnw/peppers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/bnw/peppers.jpg -------------------------------------------------------------------------------- /images/mixed/dos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/mixed/dos.jpg -------------------------------------------------------------------------------- /images/mixed/tres.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/mixed/tres.jpg -------------------------------------------------------------------------------- /images/mixed/unos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/mixed/unos.jpg -------------------------------------------------------------------------------- /images/mixing_image.py: -------------------------------------------------------------------------------- 1 | import utilities as utl 2 | 3 | # Read the images stored in ./images/bnw 4 | names = ["baboon", "lena", "peppers"] 5 | images = utl.listImages(names, "bnw", True) 6 | 7 | # Getting mixed images 8 | image1 = utl.mixImages(images, [0.23, 0.14, 0.35]) 9 | image2 = utl.mixImages(images, [0.32, 0.05, 0.17]) 10 | image3 = utl.mixImages(images, [0.07, 0.31, 0.25]) 11 | 12 | # Plot the mixed images 13 | mnames = ["unos", "dos", "tres"] 14 | mimages = [image1, image2, image3] 15 | utl.plotImages(mimages, mnames, "../plots/images/mixed", True, False) 16 | 17 | # Save the mixed images in ./images/mixed 18 | # Uncomment the below line to save the images 19 | # utl.saveImages(mimages, mnames, "mixed") 20 | 21 | # Plot the histogram of mixed images 22 | utl.showHistogram(mimages, mnames, "../plots/images/mixed_histogram", False) 23 | 24 | -------------------------------------------------------------------------------- /images/original/baboon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/original/baboon.jpg -------------------------------------------------------------------------------- /images/original/lena.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/original/lena.jpg -------------------------------------------------------------------------------- /images/original/peppers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/images/original/peppers.jpg -------------------------------------------------------------------------------- /images/preprocess_image.py: -------------------------------------------------------------------------------- 1 | import utilities as utl 2 | 3 | # Read the images stored in ./images/original 4 | names = ["baboon", "lena", "peppers"] 5 | images = utl.listImages(names, "original", False) 6 | 7 | # Plot the orginal images which are colored 8 | utl.plotImages(images, names, "../plots/images/original", False) 9 | 10 | # Plot the images after they are resized and converted to black and white 11 | images = utl.listImages(names, "original", True) 12 | utl.plotImages(images, names, "../plots/images/blackNwhite", True, False) 13 | 14 | # Save the black and white images in ./images/bnw 15 | # Uncomment the below line to save the images 16 | # utl.saveImages(images, names, "bnw") 17 | 18 | # Plot the histogram of each image 19 | utl.showHistogram(images, names, "../plots/images/bnw_histogram", False) -------------------------------------------------------------------------------- /images/utilities.py: -------------------------------------------------------------------------------- 1 | import skimage 2 | import numpy as np 3 | from skimage import io 4 | from skimage.transform import resize 5 | import matplotlib.pyplot as plt 6 | import matplotlib.image as mpimg 7 | 8 | def listImages(name_list, path, as_grey=True): 9 | """Gives a list of 200*200 Gray-Scale images whose names are specified by name_list""" 10 | image_list = [] 11 | 12 | for name in name_list: 13 | image = io.imread(path + "/" + name + ".jpg", as_grey=as_grey) 14 | if as_grey is True: 15 | image = resize(image, (200, 200)) 16 | image_list.append(image) 17 | 18 | return image_list 19 | 20 | def saveImages(image_list, name_list, path): 21 | """Saves the list of images in the folder specified by path""" 22 | i = 0 23 | for image in image_list: 24 | name = name_list[i] 25 | io.imsave(path + "/" + name + ".jpg", image) 26 | i += 1 27 | 28 | def mixImages(image_list, weights): 29 | """ Returns a image mixed in proportion with the ratios given by weights.""" 30 | size = image_list[0].shape 31 | mixImage = np.zeros(size) 32 | i = 0 33 | for image in image_list: 34 | mixImage += image*weights[i] 35 | i += 1 36 | 37 | return mixImage 38 | 39 | def list2matrix(image_list): 40 | """Converts the image into a vector and 41 | stacks the vectors to form a matirx of size (no of images)*(width*height)""" 42 | flatten_list = [] 43 | for image in image_list: 44 | flatten_list.append(image.ravel()) 45 | 46 | matrix = np.vstack(flatten_list) 47 | 48 | return matrix 49 | 50 | def matrix2list(matrix): 51 | """Converts the matrix into a list of images. 52 | Considering each row of the matrix to be a image""" 53 | image_list = [] 54 | for row in matrix: 55 | image = np.reshape(row, (200, 200)) 56 | image_list.append(image) 57 | 58 | return image_list 59 | 60 | def showHistogram(image_list, name_list, path, toSave=False, hist_range=(0.0, 1.0)): 61 | """Shows the histogram of images specified by image_list 62 | and sets the range of hist() using hist_range""" 63 | fig = plt.figure() 64 | fig.subplots_adjust(hspace=.5) 65 | image_coordinate = 321 66 | i = 0 67 | for image in image_list: 68 | fig.add_subplot(image_coordinate) 69 | plt.title(name_list[i]) 70 | plt.set_cmap('gray') 71 | plt.axis('off') 72 | plt.imshow(image) 73 | 74 | image_coordinate += 1 75 | 76 | fig.add_subplot(image_coordinate) 77 | plt.title('histogram') 78 | plt.hist(image.ravel(), bins=256, range=hist_range) 79 | 80 | image_coordinate += 1 81 | i += 1 82 | 83 | if toSave: 84 | plt.savefig(path + ".jpg") 85 | plt.show() 86 | 87 | def plotImages(image_list, name_list, path, as_grey, toSave=False): 88 | """Plots the images given in image_list side by side.""" 89 | 90 | fig = plt.figure() 91 | imageCoordinate = 100 + 10*len(image_list) + 1 92 | i = 0 93 | 94 | for image in image_list: 95 | fig.add_subplot(imageCoordinate) 96 | plt.title(name_list[i]) 97 | plt.axis('off') 98 | plt.imshow(image) 99 | if as_grey: 100 | plt.set_cmap('gray') 101 | 102 | imageCoordinate += 1 103 | i += 1 104 | 105 | if toSave: 106 | plt.savefig(path + ".jpg",bbox_inches='tight') 107 | plt.show() -------------------------------------------------------------------------------- /papers/FOBI.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/papers/FOBI.pdf -------------------------------------------------------------------------------- /papers/FastICA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/papers/FastICA.pdf -------------------------------------------------------------------------------- /plots/images/blackNwhite.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/blackNwhite.jpg -------------------------------------------------------------------------------- /plots/images/fobi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/fobi.jpg -------------------------------------------------------------------------------- /plots/images/fobi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/fobi.png -------------------------------------------------------------------------------- /plots/images/ica.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/ica.jpg -------------------------------------------------------------------------------- /plots/images/ica_g2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/ica_g2.jpg -------------------------------------------------------------------------------- /plots/images/ica_g3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/ica_g3.jpg -------------------------------------------------------------------------------- /plots/images/ica_g3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/ica_g3.png -------------------------------------------------------------------------------- /plots/images/ica_g4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/ica_g4.jpg -------------------------------------------------------------------------------- /plots/images/mixed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/mixed.jpg -------------------------------------------------------------------------------- /plots/images/original.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/original.jpg -------------------------------------------------------------------------------- /plots/images/white_tranform.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/images/white_tranform.jpg -------------------------------------------------------------------------------- /plots/sounds/Ring_StarWars_mixed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/sounds/Ring_StarWars_mixed.jpg -------------------------------------------------------------------------------- /plots/sounds/Ring_StarWars_original.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/sounds/Ring_StarWars_original.jpg -------------------------------------------------------------------------------- /plots/sounds/Ring_StarWars_separated.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/sounds/Ring_StarWars_separated.jpg -------------------------------------------------------------------------------- /plots/sounds/Ring_StarWars_separated_FOBI.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/plots/sounds/Ring_StarWars_separated_FOBI.jpg -------------------------------------------------------------------------------- /sound_FastICA_FOBI.py: -------------------------------------------------------------------------------- 1 | from scipy.io import wavfile 2 | from FastICA import FastICA 3 | from FOBI import FOBI 4 | import utilities as utl 5 | import numpy as np 6 | 7 | # Specify the name 8 | name = ["X", "Y"] 9 | 10 | #specifing epsilon(upper bound to the error) 11 | eps = 0.00000001 12 | 13 | # Read the mixed signals 14 | rate1, data1 = wavfile.read('./sounds/mixed' + name[0] + '.wav') 15 | rate2, data2 = wavfile.read('./sounds/mixed' + name[1] + '.wav') 16 | 17 | # Centering the mixed signals and scaling the values as well 18 | data1 = data1 - np.mean(data1) 19 | data1 = data1/32768 20 | data2 = data2 - np.mean(data2) 21 | data2 = data2/32768 22 | 23 | # Creating a matrix out of the signals 24 | signals = [data1, data2] 25 | matrix = np.vstack(signals) 26 | 27 | # Whitening the matrix as a pre-processing step 28 | whiteMatrix = utl.whitenMatrix(matrix) 29 | 30 | X = whiteMatrix 31 | 32 | # Find the individual components one by one 33 | vectors = [] 34 | for i in range(0, X.shape[0]): 35 | # The FastICA function is used as is from FastICA_image.py, and the it works out of the box 36 | vector = FastICA(X, vectors, eps) 37 | vectors.append(vector) 38 | 39 | # Stack the vectors to form the unmixing matrix 40 | W = np.vstack(vectors) 41 | 42 | # Get the original matrix 43 | S = np.dot(W, whiteMatrix) 44 | 45 | # Unmixing matrix through FOBI 46 | fobiW = FOBI(X) 47 | 48 | # Get the original matrix using fobiW 49 | fobiS = np.dot(fobiW.T, whiteMatrix) 50 | 51 | # Plot the separated sound signals 52 | utl.plotSounds([S[0], S[1]], ["1", "2"], rate1, "Ring_StarWars_separated") 53 | 54 | # Write the separated sound signals, 5000 is multiplied so that signal is audible 55 | wavfile.write("./sounds/FOBIseparate" + name[0] + ".wav", rate1, 5000*S[0].astype(np.int16)) 56 | wavfile.write("./sounds/FOBIseparate" + name[1] + ".wav", rate1, 5000*S[1].astype(np.int16)) 57 | 58 | # Plot the separated sound signals 59 | utl.plotSounds([fobiS[1], fobiS[0]], ["1", "2"], rate1, "Ring_StarWars_separated_FOBI") 60 | 61 | # Write the separated sound signals, 5000 is multiplied so that signal is audible 62 | wavfile.write("./sounds/FOBIseparate" + name[0] + ".wav", rate1, 5000*fobiS[1].astype(np.int16)) 63 | wavfile.write("./sounds/FOBIseparate" + name[1] + ".wav", rate1, 5000*fobiS[0].astype(np.int16)) 64 | -------------------------------------------------------------------------------- /sounds/FOBIseparateX.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/FOBIseparateX.wav -------------------------------------------------------------------------------- /sounds/FOBIseparateY.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/FOBIseparateY.wav -------------------------------------------------------------------------------- /sounds/mixedX.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/mixedX.wav -------------------------------------------------------------------------------- /sounds/mixedY.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/mixedY.wav -------------------------------------------------------------------------------- /sounds/mixing_sound.py: -------------------------------------------------------------------------------- 1 | import utilities as utl 2 | from scipy.io import wavfile 3 | import numpy as np 4 | 5 | # Read the files as numpy array 6 | rate1, data1 = wavfile.read("sourceX.wav") 7 | rate2, data2 = wavfile.read("sourceY.wav") 8 | 9 | # Using the mixSounds helper function from utilities.py 10 | mixedX = utl.mixSounds([data1, data2], [0.3, 0.7]).astype(np.int16) 11 | mixedY = utl.mixSounds([data1, data2], [0.6, 0.4]).astype(np.int16) 12 | 13 | # Plot the mixed sound sources 14 | utl.plotSounds([mixedX, mixedY], ["mixedX","mixedY"], rate1, "../plots/sounds/Ring_StarWars_mixed", False) 15 | 16 | # Save the mixed sources as wav files 17 | wavfile.write("mixedX.wav", rate1, mixedX) 18 | wavfile.write("mixedY.wav", rate1, mixedY) -------------------------------------------------------------------------------- /sounds/preprocess_sound.py: -------------------------------------------------------------------------------- 1 | """The script makes the sources to have same length, 2 | as well as have the same sampling rate""" 3 | from scipy.io import wavfile 4 | import utilities as utl 5 | 6 | # Read the .wav files as numpy arrays 7 | rate1, data1 = wavfile.read("sourceX.wav") 8 | rate2, data2 = wavfile.read("sourceY.wav") 9 | 10 | # Plot the sounds as time series data 11 | utl.plotSounds([data1, data2], ["PhoneRing", "StarWars"], rate1, "../plots/sounds/Ring_StarWars_original") 12 | 13 | # Make both of the files to have same length as well as same sampling rate 14 | minimum = min(data1.shape[0], data2.shape[0]) 15 | 16 | # Slicing the array for both the sources 17 | data1 = data1[0:minimum] 18 | data2 = data2[0:minimum] 19 | 20 | # writing the array into to the wav file with sampling rate which is average of the two 21 | wavfile.write("sourceX.wav", (rate1 + rate2)/2, data1) 22 | wavfile.write("sourceY.wav", (rate1 + rate2)/2, data2) -------------------------------------------------------------------------------- /sounds/preprocess_sound.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/preprocess_sound.pyc -------------------------------------------------------------------------------- /sounds/separateX.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/separateX.wav -------------------------------------------------------------------------------- /sounds/separateY.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/separateY.wav -------------------------------------------------------------------------------- /sounds/sourceX.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/sourceX.wav -------------------------------------------------------------------------------- /sounds/sourceY.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vishwajeet97/Cocktail-Party-Problem/c2c53af3605811035cc5f76735af99bffeef9506/sounds/sourceY.wav -------------------------------------------------------------------------------- /sounds/utilities.py: -------------------------------------------------------------------------------- 1 | import skimage 2 | import numpy as np 3 | from skimage import io 4 | from skimage.transform import resize 5 | import matplotlib.pyplot as plt 6 | import matplotlib.image as mpimg 7 | 8 | def mixSounds(sound_list, weights): 9 | """ Return a sound array mixed in proportion with the ratios given by weights""" 10 | mixSound = np.zeros(len(sound_list[0])) 11 | i = 0 12 | for sound in sound_list: 13 | mixSound += sound*weights[i] 14 | i += 1 15 | 16 | return mixSound 17 | 18 | def plotSounds(sound_list, name_list, samplerate, path, toSave=False): 19 | """Plots the sounds as a time series data""" 20 | 21 | times = np.arange(len(sound_list[0]))/float(samplerate) 22 | 23 | fig = plt.figure(figsize=(15,4)) 24 | imageCoordinate = 100 + 10*len(sound_list) + 1 25 | i = 0 26 | 27 | for sound in sound_list: 28 | fig.add_subplot(imageCoordinate) 29 | plt.fill_between(times, sound, color='k') 30 | plt.xlim(times[0], times[-1]) 31 | plt.title(name_list[i]) 32 | plt.xlabel('time (s)') 33 | plt.ylabel('amplitude') 34 | # plt.axis("off") 35 | plt.plot(sound) 36 | 37 | imageCoordinate += 1 38 | i += 1 39 | 40 | if toSave: 41 | plt.savefig(path + ".jpg", bbox_inches='tight') 42 | plt.show() -------------------------------------------------------------------------------- /utilities.py: -------------------------------------------------------------------------------- 1 | import skimage 2 | import numpy as np 3 | from skimage import io 4 | from skimage.transform import resize 5 | import matplotlib.pyplot as plt 6 | import matplotlib.image as mpimg 7 | 8 | def listImages(name_list, path, as_grey=True): 9 | """Gives a list of 200*200 Gray-Scale images whose names are specified by name_list""" 10 | image_list = [] 11 | 12 | for name in name_list: 13 | image = io.imread("./images/" + path + "/" + name + ".jpg", as_grey=as_grey) 14 | if as_grey is True: 15 | image = resize(image, (200, 200)) 16 | image_list.append(image) 17 | 18 | return image_list 19 | 20 | def saveImages(image_list, name_list, path): 21 | """Saves the list of images in the folder specified by path""" 22 | i = 0 23 | for image in image_list: 24 | name = name_list[i] 25 | io.imsave("./images/" + path + "/" + name + ".jpg", image) 26 | i += 1 27 | 28 | def list2matrix(image_list): 29 | """Converts the image into a vector and 30 | stacks the vectors to form a matirx of size (no of images)*(width*height)""" 31 | flatten_list = [] 32 | for image in image_list: 33 | flatten_list.append(image.ravel()) 34 | 35 | matrix = np.vstack(flatten_list) 36 | 37 | return matrix 38 | 39 | def matrix2list(matrix): 40 | """Converts the matrix into a list of images. 41 | Considering each row of the matrix to be a image""" 42 | image_list = [] 43 | for row in matrix: 44 | image = np.reshape(row, (200, 200)) 45 | image_list.append(image) 46 | 47 | return image_list 48 | 49 | def whitenMatrix(matrix): 50 | """Whitening tranformation is applied to the images given as a matrix""" 51 | """The transformation for the matrix X is given by E*D^(-1/2)*transpose(E)*X""" 52 | """Where D is a diagonal matrix containing eigen values of covariance matrix of X""" 53 | """E is the matrix containing eigen vectors of covariance matrix of X""" 54 | # Covariance matrix is approximated by this 55 | covMatrix = np.dot(matrix, matrix.T)/matrix.shape[1] 56 | 57 | # Doing the eigen decomposition of cavariance matrix of X 58 | eigenValue, eigenVector = np.linalg.eigh(covMatrix) 59 | # Making a diagonal matrix out of the array eigenValue 60 | diagMatrix = np.diag(eigenValue) 61 | # Computing D^(-1/2) 62 | invSqrRoot = np.sqrt(np.linalg.pinv(diagMatrix)) 63 | # Final matrix which is used for transformation 64 | whitenTrans = np.dot(eigenVector,np.dot(invSqrRoot, eigenVector.T)) 65 | # whiteMatrix is the matrix we want after all the required transformation 66 | # To verify, compute the covvariance matrix, it will be approximately identity 67 | whiteMatrix = np.dot(whitenTrans, matrix) 68 | 69 | # print np.dot(whiteMatrix, whiteMatrix.T)/matrix.shape[1] 70 | 71 | return whiteMatrix 72 | 73 | 74 | def showHistogram(image_list, name_list, path, toSave=False, hist_range=(0.0, 1.0)): 75 | """Shows the histogram of images specified by image_list 76 | and sets the range of hist() using hist_range""" 77 | fig = plt.figure() 78 | fig.subplots_adjust(hspace=.5) 79 | image_coordinate = 321 80 | i = 0 81 | for image in image_list: 82 | fig.add_subplot(image_coordinate) 83 | plt.title(name_list[i]) 84 | plt.set_cmap('gray') 85 | plt.axis('off') 86 | plt.imshow(image) 87 | 88 | image_coordinate += 1 89 | 90 | fig.add_subplot(image_coordinate) 91 | plt.title('histogram') 92 | plt.hist(image.ravel(), bins=256, range=hist_range) 93 | 94 | image_coordinate += 1 95 | i += 1 96 | 97 | if toSave: 98 | plt.savefig("./plots/images/" + path + ".jpg") 99 | plt.show() 100 | 101 | def plotImages(image_list, name_list, path, as_grey, toSave=False): 102 | """Plots the images given in image_list side by side.""" 103 | 104 | fig = plt.figure() 105 | imageCoordinate = 100 + 10*len(image_list) + 1 106 | i = 0 107 | 108 | for image in image_list: 109 | fig.add_subplot(imageCoordinate) 110 | plt.title(name_list[i]) 111 | plt.axis('off') 112 | plt.imshow(image) 113 | if as_grey: 114 | plt.set_cmap('gray') 115 | 116 | imageCoordinate += 1 117 | i += 1 118 | 119 | if toSave: 120 | plt.savefig("./plots/images/" + path + ".png",bbox_inches='tight') 121 | plt.show() 122 | 123 | def plotSounds(sound_list, name_list, samplerate, path, toSave=False): 124 | """Plots the sounds as a time series data""" 125 | 126 | times = np.arange(len(sound_list[0]))/float(samplerate) 127 | 128 | fig = plt.figure(figsize=(15,4)) 129 | imageCoordinate = 100 + 10*len(sound_list) + 1 130 | i = 0 131 | 132 | for sound in sound_list: 133 | fig.add_subplot(imageCoordinate) 134 | plt.fill_between(times, sound, color='k') 135 | plt.xlim(times[0], times[-1]) 136 | plt.title(name_list[i]) 137 | plt.xlabel('time (s)') 138 | plt.ylabel('amplitude') 139 | # plt.axis("off") 140 | plt.plot(sound) 141 | 142 | imageCoordinate += 1 143 | i += 1 144 | 145 | if toSave: 146 | plt.savefig("./plots/sounds/" + path + ".png", bbox_inches='tight') 147 | plt.show() --------------------------------------------------------------------------------