├── output.wav ├── ShuffleNetV2_Synthetic.pth ├── UDP_pxie_connector.py ├── Modified_ShufflenetV2.py ├── Main_SFANC_Window.ipynb ├── Loading_real_wave_noise_2D.py ├── Acquired_sound.py ├── Control_filter_selection.py └── README.md /output.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luo-Zhengding/SFANC-Window/HEAD/output.wav -------------------------------------------------------------------------------- /ShuffleNetV2_Synthetic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luo-Zhengding/SFANC-Window/HEAD/ShuffleNetV2_Synthetic.pth -------------------------------------------------------------------------------- /UDP_pxie_connector.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | class UDP_sender(): 4 | 5 | def __init__(self, IpAddress, Port): 6 | 7 | self.ipaddress = IpAddress 8 | self.port = Port 9 | self.serverAddressPort = (IpAddress, Port) 10 | self.UDPClientSocket = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM) 11 | 12 | def send_message(self, text): 13 | bytesToSend = str.encode(text) 14 | self.UDPClientSocket.sendto(bytesToSend, self.serverAddressPort) 15 | 16 | -------------------------------------------------------------------------------- /Modified_ShufflenetV2.py: -------------------------------------------------------------------------------- 1 | import torchvision.models as models 2 | import torch.nn as nn 3 | 4 | 5 | class Modified_ShufflenetV2(nn.Module): 6 | 7 | def __init__(self, num_classes): 8 | 9 | super().__init__() 10 | 11 | self.bw2col = nn.Sequential( 12 | nn.BatchNorm2d(1), 13 | nn.Conv2d(1, 10, 1, padding=0), nn.ReLU(), 14 | nn.Conv2d(10, 3, 1, padding=0), nn.ReLU()) 15 | 16 | self.mv2 = models.shufflenet_v2_x0_5(pretrained=True) #pre-trained shufflenet_v2_x0_5 on ImageNet 17 | 18 | self.mv2.conv5 = nn.Sequential( 19 | nn.Conv2d(192, 512, 1, 1, bias=False), 20 | nn.BatchNorm2d(512), 21 | nn.ReLU(inplace=True),) # change the output_channels from 1024 to 512 22 | 23 | self.mv2.fc = nn.Linear(512, num_classes) #change the fully connect layer 24 | 25 | def forward(self, x): 26 | x = self.bw2col(x) 27 | x = self.mv2(x) 28 | return x -------------------------------------------------------------------------------- /Main_SFANC_Window.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "10d793cb", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Refer to the paper 'Real-time implementation and explainable AI analysis of delayless CNN-based selective fixed-filter active noise control'\n", 11 | "# Before running the code, you need to have a computer microphone and real-time controller.\n", 12 | "\n", 13 | "from Acquired_sound import AudioRecorder\n", 14 | "from UDP_pxie_connector import UDP_sender\n", 15 | " \n", 16 | "import warnings\n", 17 | "warnings.filterwarnings(\"ignore\")\n", 18 | "\n", 19 | "\n", 20 | "ID1 = 0 # initial ID\n", 21 | "# Pass ID to PXIE (real-time controller)\n", 22 | "Ipaddress = \"192.168.1.103\" # !!! PXIE IP\n", 23 | "Port = 61557 # !!! PXIE Port\n", 24 | "UDP_connection = UDP_sender(Ipaddress, Port)\n", 25 | "UDP_connection.send_message(text=str(ID1))\n", 26 | "print(\"The ID of initial control filter is:\", ID1)\n", 27 | "\n", 28 | "\n", 29 | "while True:\n", 30 | " recorder = AudioRecorder(seconds=1)\n", 31 | " ID = recorder.record(\"output.wav\")\n", 32 | " \n", 33 | " if ID != ID1: # If the filter index changes, pass it to PIXE\n", 34 | " ID1 = ID\n", 35 | " print(\"The ID of selected control filter is:\", ID1)\n", 36 | " UDP_connection.send_message(text=str(ID1))" 37 | ] 38 | } 39 | ], 40 | "metadata": { 41 | "kernelspec": { 42 | "display_name": "Python 3 (ipykernel)", 43 | "language": "python", 44 | "name": "python3" 45 | }, 46 | "language_info": { 47 | "codemirror_mode": { 48 | "name": "ipython", 49 | "version": 3 50 | }, 51 | "file_extension": ".py", 52 | "mimetype": "text/x-python", 53 | "name": "python", 54 | "nbconvert_exporter": "python", 55 | "pygments_lexer": "ipython3", 56 | "version": "3.9.7" 57 | } 58 | }, 59 | "nbformat": 4, 60 | "nbformat_minor": 5 61 | } 62 | -------------------------------------------------------------------------------- /Loading_real_wave_noise_2D.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torchaudio 4 | import torchaudio.transforms as T 5 | import librosa 6 | 7 | 8 | def minmaxscaler(data): 9 | min = data.min() 10 | max = data.max() 11 | return (data)/(max-min) 12 | 13 | 14 | def resample_wav(waveform, sample_rate, resample_rate): 15 | resampler = T.Resample(sample_rate, resample_rate, dtype=waveform.dtype) 16 | resampled_waveform = resampler(waveform) 17 | return resampled_waveform 18 | 19 | 20 | class transforms_construction(): 21 | def __init__(self, sample_rate=16000, n_fft=1024, hop_length=512, n_mel=64, TwoD_nfft=256, TwoD_Hop=128): 22 | self.Sample_Rate = sample_rate 23 | self.N_FFT = n_fft 24 | self.Hop_Num = hop_length 25 | self.Mel_Num = n_mel 26 | self.TwoD_FFT = TwoD_nfft 27 | self.TwoD_Hop = TwoD_Hop 28 | 29 | def __transformation__(self, Type = 'Mel' ): 30 | if Type == 'Mel': 31 | transformation = torchaudio.transforms.MelSpectrogram(sample_rate=self.Sample_Rate, n_fft=self.N_FFT, hop_length=self.Hop_Num, n_mels=self.Mel_Num) # torch.Size([1, 64, 32]) 32 | elif Type == 'Spec': 33 | transformation = torchaudio.transforms.Spectrogram(n_fft=self.TwoD_FFT, hop_length=self.TwoD_Hop, power=2, center=False, onesided=True) #torch.Size([1, 129, 124]) 34 | else: 35 | transformation = None 36 | return transformation 37 | 38 | 39 | def loading_real_wave_noise(folde_name, sound_name): 40 | SAMPLE_WAV_SPEECH_PATH = os.path.join(folde_name, sound_name) 41 | waveform, sample_rate = torchaudio.load(SAMPLE_WAV_SPEECH_PATH) 42 | resample_rate = 16000 43 | waveform = resample_wav(waveform, sample_rate, resample_rate) 44 | return waveform, resample_rate 45 | 46 | 47 | def waveform_to_spectorgram(waveform): 48 | waveform = minmaxscaler(waveform) # minmax normalization 49 | trasformation = transforms_construction().__transformation__(Type='Mel') 50 | spectorgram = trasformation(waveform) 51 | spectorgram = librosa.core.power_to_db(spectorgram) # convert to dB 52 | spectorgram = torch.from_numpy(spectorgram) 53 | return spectorgram -------------------------------------------------------------------------------- /Acquired_sound.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import wave 3 | import math 4 | from datetime import datetime 5 | from scipy import stats 6 | 7 | from Loading_real_wave_noise_2D import loading_real_wave_noise 8 | from Control_filter_selection import Control_filter_selection 9 | 10 | # seconds: the duration of recorded noise 11 | # you can set a threshold_db: when the sound is greater than this amplitude, start recording 12 | 13 | class AudioRecorder: 14 | def __init__(self, seconds=1, chunk=1000, sample_format=pyaudio.paInt24, channels=1, fs=16000, input_device_index=1): 15 | self.seconds = seconds 16 | self.chunk = chunk 17 | self.sample_format = sample_format 18 | self.channels = channels 19 | self.fs = fs 20 | self.input_device_index = input_device_index 21 | self.p = pyaudio.PyAudio() 22 | self.stream = self.p.open(format=self.sample_format, 23 | channels=self.channels, 24 | rate=self.fs, 25 | frames_per_buffer=self.chunk, 26 | input=True, 27 | input_device_index=self.input_device_index) 28 | 29 | 30 | def record(self, filename): 31 | # Start recording 1s noise 32 | frames = [] 33 | for i in range(0, int(self.fs / self.chunk * self.seconds)): 34 | data = self.stream.read(self.chunk) 35 | frames.append(data) 36 | 37 | self.stream.stop_stream() 38 | self.stream.close() 39 | self.p.terminate() 40 | 41 | wf = wave.open(filename, 'wb') 42 | wf.setnchannels(self.channels) 43 | wf.setsampwidth(self.p.get_sample_size(self.sample_format)) 44 | wf.setframerate(self.fs) 45 | wf.writeframes(b''.join(frames)) 46 | wf.close() 47 | 48 | # Load the recorded noise 49 | sound_name = 'output' 50 | waveform, resample_rate = loading_real_wave_noise(folde_name='', sound_name=sound_name+'.wav') 51 | 52 | # Predict control filter index using SFANC 53 | id_vector = Control_filter_selection(fs=16000, Primary_noise=waveform) # Primary_noise: torch.Size([1, XX]) 54 | ID = id_vector[0] 55 | 56 | # Select the mode from control filters IDs 57 | mode = stats.mode(id_vector) 58 | ID = mode.mode[0] 59 | 60 | return ID -------------------------------------------------------------------------------- /Control_filter_selection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from Modified_ShufflenetV2 import Modified_ShufflenetV2 4 | from Loading_real_wave_noise_2D import waveform_to_spectorgram 5 | 6 | 7 | def load_weigth_for_model(model, pretrained_path): 8 | model_dict = model.state_dict() 9 | pretrained_dict = torch.load(pretrained_path, map_location="cpu") 10 | for k, v in model_dict.items(): 11 | model_dict[k] = pretrained_dict[k] 12 | model.load_state_dict(model_dict) 13 | 14 | 15 | def minmaxscaler(data): 16 | min = data.min() 17 | max = data.max() 18 | return (data)/(max-min) 19 | 20 | 21 | def Casting_multiple_time_length_of_primary_noise(primary_noise, fs): 22 | assert primary_noise.shape[0] == 1, 'The dimension of the primary noise should be [1 x samples] !!!' 23 | cast_len = primary_noise.shape[1] - primary_noise.shape[1]%fs 24 | return primary_noise[:,:cast_len] # make the length of primary_noise is an integer multiple of fs 25 | 26 | 27 | #------------------------------------------------------------- 28 | # Class : Control_filter_Index_predictor 29 | #------------------------------------------------------------- 30 | class Control_filter_Index_predictor(): 31 | 32 | def __init__(self, MODEL_PATH, device, fs): 33 | 34 | self.device = device 35 | # set the model 36 | model = Modified_ShufflenetV2(num_classes=7) 37 | model = model.to(self.device) 38 | # loading coefficients 39 | load_weigth_for_model(model, MODEL_PATH) 40 | model.eval() 41 | 42 | 43 | self.model = model 44 | self.fs = fs 45 | 46 | def predic_ID(self, noise): # predict the noise index 47 | spectorgram = waveform_to_spectorgram(noise) # !!! 2D torch.Size([1, 64, 32]) 48 | spectorgram = spectorgram.to(self.device) 49 | spectorgram = spectorgram.unsqueeze(0) # torch.Size([1, 1, 64, 32]) 50 | prediction = self.model(spectorgram) # torch.Size([7]) 51 | pred = torch.argmax(prediction).item() 52 | return pred 53 | 54 | def predic_ID_vector(self, primary_noise): 55 | # Checking the length of the primary noise. 56 | assert primary_noise.shape[0] == 1, 'The dimension of the primary noise should be [1 x samples] !!!' 57 | assert primary_noise.shape[1] % self.fs == 0, 'The length of the primary noise is not an integral multiple of fs.' 58 | 59 | # Computing how many seconds the primary noise contain. 60 | Time_len = int(primary_noise.shape[1]/self.fs) 61 | 62 | # Bulding the matric of the primary noise [times x 1 x fs] 63 | primary_noise_vectors = primary_noise.reshape(Time_len, self.fs).unsqueeze(1) 64 | 65 | # Implementing the noise classification for each frame whose length is 1 second. 66 | ID_vector = [] 67 | for ii in range(Time_len): 68 | ID_vector.append(self.predic_ID(primary_noise_vectors[ii])) 69 | return ID_vector 70 | 71 | 72 | def Control_filter_selection(fs=16000, Primary_noise=None): 73 | 74 | # pretrained CNN model path 75 | MODEL_PATH = 'ShuffleNetV2_Synthetic.pth' 76 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 77 | 78 | Pre_trained_control_filter_ID_pridector = Control_filter_Index_predictor(MODEL_PATH=MODEL_PATH, device=device, fs=fs) 79 | 80 | Primary_noise = Casting_multiple_time_length_of_primary_noise(Primary_noise, fs=fs) 81 | 82 | Id_vector = Pre_trained_control_filter_ID_pridector.predic_ID_vector(Primary_noise) 83 | 84 | return Id_vector -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CNN-based SFANC implemented in ANC Window 2 | 3 | This repository contains the code for the paper "**Real-time Implementation and Explainable AI Analysis of Delayless CNN-based Selective Fixed-filter Active Noise Control**," accepted by the *Mechanical Systems and Signal Processing* journal. The paper is available on [ResearchGate](https://www.researchgate.net/publication/379371184_Real-time_implementation_and_explainable_AI_analysis_of_delayless_CNN-based_selective_fixed-filter_active_noise_control) and [Elsevier](https://www.sciencedirect.com/science/article/pii/S0888327024002620). 4 | 5 |

6 | 7 | 8 | 9 |

10 | 11 | ## Video of Experiment Results 12 | - [Active Noise Control Window based on SFANC](https://youtu.be/K1pWeNLMoDM) 13 | 14 | ## Highlights 15 | 1. Implements CNN-based selective fixed-filter active noise control (SFANC) in a multichannel ANC window for delayless noise reduction. 16 | 2. Abstracts ANC as a Markov process and provides a theoretical analysis to validate the CNN-based SFANC method. 17 | 3. Utilizes an explainable AI technique, LayerCAM, to visually explain the decision-making process in the CNN-based SFANC method. 18 | 4. Demonstrates effective attenuation of various low-frequency noises and good transferability through simulations and real-time experiments. 19 | 20 | ## Usage 21 | ### Pre-trained CNN 22 | - A synthetic noise dataset is used to train the CNN, containing 80,000 noise instances for training. The noise instances are generated by filtering white noise through various bandpass filters with randomly chosen center frequencies and bandwidths. Each noise instance has a 1-second duration. 23 | - The code and pre-trained model of the CNN are provided in `Modified_ShufflenetV2.py` and `ShuffleNetV2_Synthetic.pth`, respectively. 24 | 25 | ### Training dataset 26 | The noise dataset used to train the CNN is available at - [Training dataset](https://drive.google.com/file/d/12e2gh_eCCLUMIu6qbzn_qEXCslLWL_JJ/view?usp=sharing) 27 | 28 | ### Real-time Implementation 29 | - 7 pre-trained control filters are obtained in the 4-channel ANC window with 1 reference microphone, 4 secondary sources, and 4 error sensors. 7 broadband noises with different frequency ranges (as shown in the figure below) are used as primary noises to obtain the corresponding pre-trained control filters. 30 | 31 |

32 | 33 |

34 | 35 | - During real-time noise control, the laptop runs the code `Main_SFANC_Window.ipynb` to transmit the index of the selected control filter to the PXI processing unit. 36 | 37 | ### Applying to New Environments 38 | - To use the CNN-based SFANC method in new acoustic environments, obtain the corresponding pre-trained control filters in the new acoustic paths. The trained CNN in the CNN-based SFANC method can remain unchanged. For more details, please refer to the paper. 39 | 40 | ## Related Works 41 | - [Delayless Generative Fixed-filter Active Noise Control based on Deep Learning and Bayesian Filter](https://ieeexplore.ieee.org/document/10339836/) 42 | - [Deep Generative Fixed-Filter Active Noise Control](https://arxiv.org/pdf/2303.05788) 43 | - [GFANC-Kalman: Generative Fixed-Filter Active Noise Control with CNN-Kalman Filtering](https://ieeexplore.ieee.org/document/10323505) 44 | - [Unsupervised-GFANC: Unsupervised Learning Based End-to-End Delayless Generative Fixed-Filter Active Noise Control](https://arxiv.org/pdf/2402.09460.pdf) 45 | - [A hybrid sfanc-fxnlms algorithm for active noise control based on deep learning](https://arxiv.org/pdf/2208.08082) 46 | - [Performance Evaluation of Selective Fixed-filter Active Noise Control based on Different Convolutional Neural Networks](https://arxiv.org/pdf/2208.08440) 47 | 48 | **If you are interested in our works, please consider citing our papers. Thanks! Have a great day!** 49 | --------------------------------------------------------------------------------