├── output.wav ├── ShuffleNetV2_Synthetic.pth ├── UDP_pxie_connector.py ├── Modified_ShufflenetV2.py ├── Main_SFANC_Window.ipynb ├── Loading_real_wave_noise_2D.py ├── Acquired_sound.py ├── Control_filter_selection.py └── README.md /output.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luo-Zhengding/SFANC-Window/HEAD/output.wav -------------------------------------------------------------------------------- /ShuffleNetV2_Synthetic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luo-Zhengding/SFANC-Window/HEAD/ShuffleNetV2_Synthetic.pth -------------------------------------------------------------------------------- /UDP_pxie_connector.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | class UDP_sender(): 4 | 5 | def __init__(self, IpAddress, Port): 6 | 7 | self.ipaddress = IpAddress 8 | self.port = Port 9 | self.serverAddressPort = (IpAddress, Port) 10 | self.UDPClientSocket = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM) 11 | 12 | def send_message(self, text): 13 | bytesToSend = str.encode(text) 14 | self.UDPClientSocket.sendto(bytesToSend, self.serverAddressPort) 15 | 16 | -------------------------------------------------------------------------------- /Modified_ShufflenetV2.py: -------------------------------------------------------------------------------- 1 | import torchvision.models as models 2 | import torch.nn as nn 3 | 4 | 5 | class Modified_ShufflenetV2(nn.Module): 6 | 7 | def __init__(self, num_classes): 8 | 9 | super().__init__() 10 | 11 | self.bw2col = nn.Sequential( 12 | nn.BatchNorm2d(1), 13 | nn.Conv2d(1, 10, 1, padding=0), nn.ReLU(), 14 | nn.Conv2d(10, 3, 1, padding=0), nn.ReLU()) 15 | 16 | self.mv2 = models.shufflenet_v2_x0_5(pretrained=True) #pre-trained shufflenet_v2_x0_5 on ImageNet 17 | 18 | self.mv2.conv5 = nn.Sequential( 19 | nn.Conv2d(192, 512, 1, 1, bias=False), 20 | nn.BatchNorm2d(512), 21 | nn.ReLU(inplace=True),) # change the output_channels from 1024 to 512 22 | 23 | self.mv2.fc = nn.Linear(512, num_classes) #change the fully connect layer 24 | 25 | def forward(self, x): 26 | x = self.bw2col(x) 27 | x = self.mv2(x) 28 | return x -------------------------------------------------------------------------------- /Main_SFANC_Window.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "10d793cb", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Refer to the paper 'Real-time implementation and explainable AI analysis of delayless CNN-based selective fixed-filter active noise control'\n", 11 | "# Before running the code, you need to have a computer microphone and real-time controller.\n", 12 | "\n", 13 | "from Acquired_sound import AudioRecorder\n", 14 | "from UDP_pxie_connector import UDP_sender\n", 15 | " \n", 16 | "import warnings\n", 17 | "warnings.filterwarnings(\"ignore\")\n", 18 | "\n", 19 | "\n", 20 | "ID1 = 0 # initial ID\n", 21 | "# Pass ID to PXIE (real-time controller)\n", 22 | "Ipaddress = \"192.168.1.103\" # !!! PXIE IP\n", 23 | "Port = 61557 # !!! PXIE Port\n", 24 | "UDP_connection = UDP_sender(Ipaddress, Port)\n", 25 | "UDP_connection.send_message(text=str(ID1))\n", 26 | "print(\"The ID of initial control filter is:\", ID1)\n", 27 | "\n", 28 | "\n", 29 | "while True:\n", 30 | " recorder = AudioRecorder(seconds=1)\n", 31 | " ID = recorder.record(\"output.wav\")\n", 32 | " \n", 33 | " if ID != ID1: # If the filter index changes, pass it to PIXE\n", 34 | " ID1 = ID\n", 35 | " print(\"The ID of selected control filter is:\", ID1)\n", 36 | " UDP_connection.send_message(text=str(ID1))" 37 | ] 38 | } 39 | ], 40 | "metadata": { 41 | "kernelspec": { 42 | "display_name": "Python 3 (ipykernel)", 43 | "language": "python", 44 | "name": "python3" 45 | }, 46 | "language_info": { 47 | "codemirror_mode": { 48 | "name": "ipython", 49 | "version": 3 50 | }, 51 | "file_extension": ".py", 52 | "mimetype": "text/x-python", 53 | "name": "python", 54 | "nbconvert_exporter": "python", 55 | "pygments_lexer": "ipython3", 56 | "version": "3.9.7" 57 | } 58 | }, 59 | "nbformat": 4, 60 | "nbformat_minor": 5 61 | } 62 | -------------------------------------------------------------------------------- /Loading_real_wave_noise_2D.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torchaudio 4 | import torchaudio.transforms as T 5 | import librosa 6 | 7 | 8 | def minmaxscaler(data): 9 | min = data.min() 10 | max = data.max() 11 | return (data)/(max-min) 12 | 13 | 14 | def resample_wav(waveform, sample_rate, resample_rate): 15 | resampler = T.Resample(sample_rate, resample_rate, dtype=waveform.dtype) 16 | resampled_waveform = resampler(waveform) 17 | return resampled_waveform 18 | 19 | 20 | class transforms_construction(): 21 | def __init__(self, sample_rate=16000, n_fft=1024, hop_length=512, n_mel=64, TwoD_nfft=256, TwoD_Hop=128): 22 | self.Sample_Rate = sample_rate 23 | self.N_FFT = n_fft 24 | self.Hop_Num = hop_length 25 | self.Mel_Num = n_mel 26 | self.TwoD_FFT = TwoD_nfft 27 | self.TwoD_Hop = TwoD_Hop 28 | 29 | def __transformation__(self, Type = 'Mel' ): 30 | if Type == 'Mel': 31 | transformation = torchaudio.transforms.MelSpectrogram(sample_rate=self.Sample_Rate, n_fft=self.N_FFT, hop_length=self.Hop_Num, n_mels=self.Mel_Num) # torch.Size([1, 64, 32]) 32 | elif Type == 'Spec': 33 | transformation = torchaudio.transforms.Spectrogram(n_fft=self.TwoD_FFT, hop_length=self.TwoD_Hop, power=2, center=False, onesided=True) #torch.Size([1, 129, 124]) 34 | else: 35 | transformation = None 36 | return transformation 37 | 38 | 39 | def loading_real_wave_noise(folde_name, sound_name): 40 | SAMPLE_WAV_SPEECH_PATH = os.path.join(folde_name, sound_name) 41 | waveform, sample_rate = torchaudio.load(SAMPLE_WAV_SPEECH_PATH) 42 | resample_rate = 16000 43 | waveform = resample_wav(waveform, sample_rate, resample_rate) 44 | return waveform, resample_rate 45 | 46 | 47 | def waveform_to_spectorgram(waveform): 48 | waveform = minmaxscaler(waveform) # minmax normalization 49 | trasformation = transforms_construction().__transformation__(Type='Mel') 50 | spectorgram = trasformation(waveform) 51 | spectorgram = librosa.core.power_to_db(spectorgram) # convert to dB 52 | spectorgram = torch.from_numpy(spectorgram) 53 | return spectorgram -------------------------------------------------------------------------------- /Acquired_sound.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import wave 3 | import math 4 | from datetime import datetime 5 | from scipy import stats 6 | 7 | from Loading_real_wave_noise_2D import loading_real_wave_noise 8 | from Control_filter_selection import Control_filter_selection 9 | 10 | # seconds: the duration of recorded noise 11 | # you can set a threshold_db: when the sound is greater than this amplitude, start recording 12 | 13 | class AudioRecorder: 14 | def __init__(self, seconds=1, chunk=1000, sample_format=pyaudio.paInt24, channels=1, fs=16000, input_device_index=1): 15 | self.seconds = seconds 16 | self.chunk = chunk 17 | self.sample_format = sample_format 18 | self.channels = channels 19 | self.fs = fs 20 | self.input_device_index = input_device_index 21 | self.p = pyaudio.PyAudio() 22 | self.stream = self.p.open(format=self.sample_format, 23 | channels=self.channels, 24 | rate=self.fs, 25 | frames_per_buffer=self.chunk, 26 | input=True, 27 | input_device_index=self.input_device_index) 28 | 29 | 30 | def record(self, filename): 31 | # Start recording 1s noise 32 | frames = [] 33 | for i in range(0, int(self.fs / self.chunk * self.seconds)): 34 | data = self.stream.read(self.chunk) 35 | frames.append(data) 36 | 37 | self.stream.stop_stream() 38 | self.stream.close() 39 | self.p.terminate() 40 | 41 | wf = wave.open(filename, 'wb') 42 | wf.setnchannels(self.channels) 43 | wf.setsampwidth(self.p.get_sample_size(self.sample_format)) 44 | wf.setframerate(self.fs) 45 | wf.writeframes(b''.join(frames)) 46 | wf.close() 47 | 48 | # Load the recorded noise 49 | sound_name = 'output' 50 | waveform, resample_rate = loading_real_wave_noise(folde_name='', sound_name=sound_name+'.wav') 51 | 52 | # Predict control filter index using SFANC 53 | id_vector = Control_filter_selection(fs=16000, Primary_noise=waveform) # Primary_noise: torch.Size([1, XX]) 54 | ID = id_vector[0] 55 | 56 | # Select the mode from control filters IDs 57 | mode = stats.mode(id_vector) 58 | ID = mode.mode[0] 59 | 60 | return ID -------------------------------------------------------------------------------- /Control_filter_selection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from Modified_ShufflenetV2 import Modified_ShufflenetV2 4 | from Loading_real_wave_noise_2D import waveform_to_spectorgram 5 | 6 | 7 | def load_weigth_for_model(model, pretrained_path): 8 | model_dict = model.state_dict() 9 | pretrained_dict = torch.load(pretrained_path, map_location="cpu") 10 | for k, v in model_dict.items(): 11 | model_dict[k] = pretrained_dict[k] 12 | model.load_state_dict(model_dict) 13 | 14 | 15 | def minmaxscaler(data): 16 | min = data.min() 17 | max = data.max() 18 | return (data)/(max-min) 19 | 20 | 21 | def Casting_multiple_time_length_of_primary_noise(primary_noise, fs): 22 | assert primary_noise.shape[0] == 1, 'The dimension of the primary noise should be [1 x samples] !!!' 23 | cast_len = primary_noise.shape[1] - primary_noise.shape[1]%fs 24 | return primary_noise[:,:cast_len] # make the length of primary_noise is an integer multiple of fs 25 | 26 | 27 | #------------------------------------------------------------- 28 | # Class : Control_filter_Index_predictor 29 | #------------------------------------------------------------- 30 | class Control_filter_Index_predictor(): 31 | 32 | def __init__(self, MODEL_PATH, device, fs): 33 | 34 | self.device = device 35 | # set the model 36 | model = Modified_ShufflenetV2(num_classes=7) 37 | model = model.to(self.device) 38 | # loading coefficients 39 | load_weigth_for_model(model, MODEL_PATH) 40 | model.eval() 41 | 42 | 43 | self.model = model 44 | self.fs = fs 45 | 46 | def predic_ID(self, noise): # predict the noise index 47 | spectorgram = waveform_to_spectorgram(noise) # !!! 2D torch.Size([1, 64, 32]) 48 | spectorgram = spectorgram.to(self.device) 49 | spectorgram = spectorgram.unsqueeze(0) # torch.Size([1, 1, 64, 32]) 50 | prediction = self.model(spectorgram) # torch.Size([7]) 51 | pred = torch.argmax(prediction).item() 52 | return pred 53 | 54 | def predic_ID_vector(self, primary_noise): 55 | # Checking the length of the primary noise. 56 | assert primary_noise.shape[0] == 1, 'The dimension of the primary noise should be [1 x samples] !!!' 57 | assert primary_noise.shape[1] % self.fs == 0, 'The length of the primary noise is not an integral multiple of fs.' 58 | 59 | # Computing how many seconds the primary noise contain. 60 | Time_len = int(primary_noise.shape[1]/self.fs) 61 | 62 | # Bulding the matric of the primary noise [times x 1 x fs] 63 | primary_noise_vectors = primary_noise.reshape(Time_len, self.fs).unsqueeze(1) 64 | 65 | # Implementing the noise classification for each frame whose length is 1 second. 66 | ID_vector = [] 67 | for ii in range(Time_len): 68 | ID_vector.append(self.predic_ID(primary_noise_vectors[ii])) 69 | return ID_vector 70 | 71 | 72 | def Control_filter_selection(fs=16000, Primary_noise=None): 73 | 74 | # pretrained CNN model path 75 | MODEL_PATH = 'ShuffleNetV2_Synthetic.pth' 76 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 77 | 78 | Pre_trained_control_filter_ID_pridector = Control_filter_Index_predictor(MODEL_PATH=MODEL_PATH, device=device, fs=fs) 79 | 80 | Primary_noise = Casting_multiple_time_length_of_primary_noise(Primary_noise, fs=fs) 81 | 82 | Id_vector = Pre_trained_control_filter_ID_pridector.predic_ID_vector(Primary_noise) 83 | 84 | return Id_vector -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CNN-based SFANC implemented in ANC Window 2 | 3 | This repository contains the code for the paper "**Real-time Implementation and Explainable AI Analysis of Delayless CNN-based Selective Fixed-filter Active Noise Control**," accepted by the *Mechanical Systems and Signal Processing* journal. The paper is available on [ResearchGate](https://www.researchgate.net/publication/379371184_Real-time_implementation_and_explainable_AI_analysis_of_delayless_CNN-based_selective_fixed-filter_active_noise_control) and [Elsevier](https://www.sciencedirect.com/science/article/pii/S0888327024002620). 4 | 5 |
6 |
7 |
8 |
9 |
32 |
33 |