├── Macaques └── UNetCPP.png ├── Human-Speech-and-Music ├── Asteroid-Experimentation │ ├── model.png │ ├── Notebooks │ │ ├── example.wav │ │ └── example2.wav │ └── Recipes │ │ ├── Asteroid_V1.ipynb │ │ └── Asteroid_V0.ipynb └── Experimenting-with-Asteroid.ipynb ├── environment.yml ├── .gitignore ├── Bookshelf └── Library.md └── README.md /Macaques/UNetCPP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bs/cocktail-party-problem/master/Macaques/UNetCPP.png -------------------------------------------------------------------------------- /Human-Speech-and-Music/Asteroid-Experimentation/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bs/cocktail-party-problem/master/Human-Speech-and-Music/Asteroid-Experimentation/model.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: fastai2 2 | channels: 3 | - fastai 4 | - pytorch 5 | - defaults 6 | dependencies: 7 | - jupyter 8 | - pandas 9 | - fastprogress>=0.1.22 10 | - pip -------------------------------------------------------------------------------- /Human-Speech-and-Music/Asteroid-Experimentation/Notebooks/example.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bs/cocktail-party-problem/master/Human-Speech-and-Music/Asteroid-Experimentation/Notebooks/example.wav -------------------------------------------------------------------------------- /Human-Speech-and-Music/Asteroid-Experimentation/Notebooks/example2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bs/cocktail-party-problem/master/Human-Speech-and-Music/Asteroid-Experimentation/Notebooks/example2.wav -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Human-Speech-and-Music/Asteroid-Experimentation/MiniLibriMix 2 | */.ipynb_checkpoints/* 3 | Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid 4 | Human-Speech-and-Music/sound-separation/datasets/fuss/data 5 | Human-Speech-and-Music/Asteroid-Experimentation/Notebooks/MiniLibriMix 6 | Human-Speech-and-Music/Asteroid-Experimentation/Notebooks/lightning_logs 7 | */lightning_logs/* 8 | Human-Speech-and-Music/Asteroid-Experimentation/lightning_logs/ 9 | .ipynb_checkpoints 10 | Human-Speech-and-Music/Miscellaneous/sound-separation 11 | -------------------------------------------------------------------------------- /Human-Speech-and-Music/Experimenting-with-Asteroid.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "'0.3.3'" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import asteroid\n", 21 | "asteroid.__version__" 22 | ] 23 | } 24 | ], 25 | "metadata": { 26 | "kernelspec": { 27 | "display_name": "Python 3", 28 | "language": "python", 29 | "name": "python3" 30 | }, 31 | "language_info": { 32 | "codemirror_mode": { 33 | "name": "ipython", 34 | "version": 3 35 | }, 36 | "file_extension": ".py", 37 | "mimetype": "text/x-python", 38 | "name": "python", 39 | "nbconvert_exporter": "python", 40 | "pygments_lexer": "ipython3", 41 | "version": "3.7.6" 42 | } 43 | }, 44 | "nbformat": 4, 45 | "nbformat_minor": 4 46 | } 47 | -------------------------------------------------------------------------------- /Human-Speech-and-Music/Asteroid-Experimentation/Recipes/Asteroid_V1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import asteroid" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 3, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "['ConvTasNet',\n", 21 | " 'DPRNNTasNet',\n", 22 | " 'DPTNet',\n", 23 | " 'DeMask',\n", 24 | " 'LSTMTasNet',\n", 25 | " '__all__',\n", 26 | " '__builtins__',\n", 27 | " '__cached__',\n", 28 | " '__doc__',\n", 29 | " '__file__',\n", 30 | " '__loader__',\n", 31 | " '__name__',\n", 32 | " '__package__',\n", 33 | " '__path__',\n", 34 | " '__spec__',\n", 35 | " '__version__',\n", 36 | " 'deprecation_utils',\n", 37 | " 'dsp',\n", 38 | " 'filterbanks',\n", 39 | " 'losses',\n", 40 | " 'masknn',\n", 41 | " 'models',\n", 42 | " 'pathlib',\n", 43 | " 'project_root',\n", 44 | " 'show_available_models',\n", 45 | " 'torch_utils',\n", 46 | " 'utils']" 47 | ] 48 | }, 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "dir(asteroid)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "['T_destination',\n", 67 | " '__annotations__',\n", 68 | " '__call__',\n", 69 | " '__class__',\n", 70 | " '__delattr__',\n", 71 | " '__dict__',\n", 72 | " '__dir__',\n", 73 | " '__doc__',\n", 74 | " '__eq__',\n", 75 | " '__format__',\n", 76 | " '__ge__',\n", 77 | " '__getattr__',\n", 78 | " '__getattribute__',\n", 79 | " '__gt__',\n", 80 | " '__hash__',\n", 81 | " '__init__',\n", 82 | " '__init_subclass__',\n", 83 | " '__le__',\n", 84 | " '__lt__',\n", 85 | " '__module__',\n", 86 | " '__ne__',\n", 87 | " '__new__',\n", 88 | " '__reduce__',\n", 89 | " '__reduce_ex__',\n", 90 | " '__repr__',\n", 91 | " '__setattr__',\n", 92 | " '__setstate__',\n", 93 | " '__sizeof__',\n", 94 | " '__str__',\n", 95 | " '__subclasshook__',\n", 96 | " '__weakref__',\n", 97 | " '_apply',\n", 98 | " '_call_impl',\n", 99 | " '_forward_unimplemented',\n", 100 | " '_get_name',\n", 101 | " '_load_from_state_dict',\n", 102 | " '_named_members',\n", 103 | " '_register_load_state_dict_pre_hook',\n", 104 | " '_register_state_dict_hook',\n", 105 | " '_replicate_for_data_parallel',\n", 106 | " '_save_to_state_dict',\n", 107 | " '_separate',\n", 108 | " '_slow_forward',\n", 109 | " '_version',\n", 110 | " 'add_module',\n", 111 | " 'apply',\n", 112 | " 'bfloat16',\n", 113 | " 'buffers',\n", 114 | " 'children',\n", 115 | " 'cpu',\n", 116 | " 'cuda',\n", 117 | " 'double',\n", 118 | " 'dump_patches',\n", 119 | " 'eval',\n", 120 | " 'extra_repr',\n", 121 | " 'file_separate',\n", 122 | " 'float',\n", 123 | " 'forward',\n", 124 | " 'from_pretrained',\n", 125 | " 'get_model_args',\n", 126 | " 'get_state_dict',\n", 127 | " 'half',\n", 128 | " 'load_state_dict',\n", 129 | " 'modules',\n", 130 | " 'named_buffers',\n", 131 | " 'named_children',\n", 132 | " 'named_modules',\n", 133 | " 'named_parameters',\n", 134 | " 'numpy_separate',\n", 135 | " 'parameters',\n", 136 | " 'register_backward_hook',\n", 137 | " 'register_buffer',\n", 138 | " 'register_forward_hook',\n", 139 | " 'register_forward_pre_hook',\n", 140 | " 'register_parameter',\n", 141 | " 'requires_grad_',\n", 142 | " 'separate',\n", 143 | " 'serialize',\n", 144 | " 'share_memory',\n", 145 | " 'state_dict',\n", 146 | " 'to',\n", 147 | " 'torch_separate',\n", 148 | " 'train',\n", 149 | " 'type',\n", 150 | " 'zero_grad']" 151 | ] 152 | }, 153 | "execution_count": 4, 154 | "metadata": {}, 155 | "output_type": "execute_result" 156 | } 157 | ], 158 | "source": [ 159 | "dir(asteroid.ConvTasNet)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.7.6" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 4 191 | } 192 | -------------------------------------------------------------------------------- /Bookshelf/Library.md: -------------------------------------------------------------------------------- 1 | # Key Resources 2 | 1. [The “Cocktail Party Problem”: What Is It? How Can It Be Solved? And Why Should Animal Behaviorists Study It?](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2692487/) 3 | 2. [Asteroid: The PyTorch-Based Audio Source Separation Toolkit for Researchers](https://arxiv.org/abs/2005.04132) 4 | 3. [Wavesplit: End-to-End Speech Separation by Speaker Clustering](https://arxiv.org/abs/2002.08933) 5 | 4. [Dual-Path Transformer Network: Direct Context-Aware Modeling for End-to-End Monaural Speech Separation](https://arxiv.org/abs/2007.13975) 6 | 5. [Voice Separation with an Unknown Number of Multiple Speakers](https://arxiv.org/abs/2003.01531) 7 | 6. [Separation of Overlapping Sources in Bioacoustic Mixtures](https://asa.scitation.org/doi/10.1121/10.0000932) 8 | 7. [Separating Overlapping Bat Calls with a Bi-Directional Long Short-Term Memory Network](https://www.biorxiv.org/content/10.1101/2019.12.15.876656v1) 9 | 8. [Music Source Separation in the Waveform Domain](https://arxiv.org/abs/1911.13254) 10 | 9. [Deep Audio Prior](https://arxiv.org/abs/1912.10292) 11 | 10. [Deep Learning for Monaural Speech Separation](https://paris.cs.illinois.edu/pubs/huang-icassp2014.pdf) 12 | 11. [The Separation of Overlapped Dolphin Signature Whistle Based on Blind Source Separation](https://ieeexplore.ieee.org/document/8242534) 13 | 12. [A Comparative Study of Blind Source Separation for Bioacoustics Sounds based on FastICA, PCA and NMF](https://www.sciencedirect.com/science/article/pii/S1877050918312468?via%3Dihub) 14 | 13. [Investigating Deep Neural Transformations for Spectrogram-Based Musical Source Separation](https://arxiv.org/abs/1912.02591) 15 | 14. [A Physiologically Inspired Model for Solving the Cocktail Party Problem](https://link.springer.com/content/pdf/10.1007/s10162-019-00732-4.pdf) 16 | 15. [Audio Source Separation with Discriminative Scattering Networks](https://arxiv.org/abs/1412.7022) 17 | 16. [Blind Nonnegative Source Separation Using Biological Neural Networks](https://arxiv.org/abs/1706.00382) 18 | 19 | # Miscellaneous Key Resources 20 | 1. [End-to-End Overlapped Speech Detection and Speaker Counting with Raw Waveform](https://ieeexplore.ieee.org/document/9003962) 21 | 2. [Perceptual and Neural Mechanisms of Auditory Scene Analysis in the European Starling](https://link.springer.com/chapter/10.1007/978-3-319-48690-1_3) 22 | 3. [Neuromorphic Model for Sound Source Segregation](https://drum.lib.umd.edu/handle/1903/18155) 23 | 4. [Efficient Trainable Front-Ends for Neural Speech Enhancement](https://arxiv.org/abs/2002.09286) 24 | 5. [Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision](https://arxiv.org/abs/1911.02182) 25 | 6. [Monaural Source Separation Based on Sequentially Trained LSTMs in Real Room Environments](https://ieeexplore.ieee.org/document/8902640) 26 | 7. [The Phasebook: Building Complex Masks via Discrete Representations for Source Separation](https://waseda.pure.elsevier.com/en/publications/the-phasebook-building-complex-masks-via-discrete-representations) 27 | 8. [Universal Sound Separation](https://arxiv.org/abs/1905.03330) 28 | 9. [Speaker-independent Speech Separation with Deep Attractor Network](https://arxiv.org/abs/1707.03634) 29 | 10. [Deep Clustering and Conventional Networks for Music Separation: Stronger Together](https://arxiv.org/abs/1611.06265) 30 | 11. [Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks](https://ieeexplore.ieee.org/document/7178061) 31 | 12. [Joint Optimization of Masks and Deep Recurrent Neural Networks for Monaural Source Separation](https://arxiv.org/abs/1502.04149) 32 | 13. [Conv-TasNet: Surpassing Ideal Time-Frequency Magnitude Masking for Speech Separation](https://arxiv.org/abs/1809.07454) 33 | 14. [Deep Learning Based Phase Reconstruction for Speaker Separation: A Trigonometric Perspective](https://arxiv.org/abs/1811.09010) 34 | 15. [Divide and Conquer: A Deep CASA Approach to Talker-independent Monaural Speaker Separation](https://arxiv.org/abs/1904.11148) 35 | 16. [Dual-Path RNN: Efficient Long Sequence Modeling for Time-Domain Single-Channel Speech Separation](https://arxiv.org/abs/1910.06379) 36 | 17. [A Comprehensive Study of Speech Separation: Spectrogram vs Waveform Separation](https://arxiv.org/abs/1905.07497) 37 | 18. [FurcaNeXt: End-to-End Monaural Speech Separation with Dynamic Gated Dilated Temporal Convolutional Networks](https://arxiv.org/abs/1902.04891) 38 | 19. [Improving Universal Sound Separation Using Sound Classification](https://arxiv.org/abs/1911.07951) 39 | 20. [SpEx: Multi-Scale Time Domain Speaker Extraction Network](https://arxiv.org/abs/2004.08326) 40 | 21. [Meta-Learning Extractors for Music Source Separation](https://arxiv.org/abs/2002.07016) 41 | 22. [Unsupervised Learning of Semantic Audio Representations](https://arxiv.org/abs/1711.02209) 42 | 23. [Sudo rm -rf: Efficient Networks for Universal Audio Source Separation](https://arxiv.org/abs/2007.06833) 43 | 24. [Unsupervised Sound Separation Using Mixtures of Mixtures](https://arxiv.org/abs/2006.12701) 44 | 25. [Listen to What You Want: Neural Network-based Universal Sound Selector](https://arxiv.org/abs/2006.05712) 45 | 26. [Speech Separation Based on Multi-Stage Elaborated Dual-Path Deep BiLSTM with Auxiliary Identity Loss](https://arxiv.org/abs/2008.03149) 46 | 27. [Identify Speakers in Cocktail Parties with End-to-End Attention](https://arxiv.org/abs/2005.11408) 47 | 48 | # Miscellaneous Concepts 49 | 1. Matched Filters 50 | 2. Hilbert Huang Transforms 51 | 3. Lombard Effect 52 | 4. Dip-Listening Hypothesis 53 | 5. Noise-Invariant, Rule-Encoding, and Temporal Edge Neurons 54 | 6. FastICA (Independent Component Analysis), Point Source Separation (PSS), Non-Negative Matrix Factorization (NMF) 55 | 7. Cortical Representation of Speech, Auditory Object Analysis 56 | 8. Deep Scattering Spectrum, Kymatio, Invariant Scattering Convolution Networks 57 | 9. Error-Gated Hebbian Rule 58 | 10. Probabilistic Latent Component Analysis (PLCA), Robust PCA (RPCA), Low-Rank Modeling 59 | 11. Progressive Learning 60 | 12. Denoising Autoencoders 61 | 13. Temporal Convolutional Networks (TCNs) 62 | 14. Perfect Reconstruction Filterbank (PRFB), Gammatone Filterbank 63 | 15. Deep Feature Losses, Short-Time Objective Intelligibility (STOI), Perceptual Evaluation of Speech Quality (PESQ) 64 | 16. Wiener Filtering 65 | 17. Permutation Invariant Training 66 | 18. Supervised Independent Vector Analysis 67 | 19. Phase Spectra and Group Delay 68 | 20. Spectrogram Fusion 69 | 21. Self-Organizing Background Subtraction 70 | 22. Phase-Aware Signal Processing 71 | 23. Deep Clustering, Deep Attractor, and Selective Hearing Networks 72 | 24. Multimodal/Crossmodal Fusion -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cocktail Party Problem 2 | 3 | ## Background 4 | The cocktail party problem (CPP) encompasses the problem of perceiving speech in noisy social settings. In human applications, the CPP has been well-characterized, and the fundamental question involves addressing how individuals recognize what another is saying even in the present of multiple overlapping speakers. Upon receiving a composite sound pressure waveform resulting from the acoustic interference of (1) multiple signalers, and (2) abiotic and (3) other biotic sources of noise, the human auditory system parses the incoming signal, integrating and segregating the stimulus into perceptually coherent and functional representations consisting of auditory objects, images, and/or streams via a complex neural circuitry. Acoustic cues that enable the human auditory system to solve the CPP include harmonicity of signals, temporal synchrony of overlapping signals, amplitude modulation rates, and spatial localization computed using interaural time differences (ITDs) and interaural level differences (ILDs). Numerous studies have aimed to address the cocktail party problem in human speech and music applications, typically employing deep machine learning (ML) techniques, often with reasonably high levels of success. Others have studied the CPP by focusing on multiple microphones (which is expressly unpragmatic for most bioacoustics applications) or by employing unsupervised methods, the most popular of which being the independent component analysis (ICA). While publications are available, a harder (and more relevant) challenge involves ‘single-channel source separation’ in which the dataset contains mixed audio signals recorded using a single microphone. 5 | 6 | Contrarily, animal studies addressing the CPP remain comparatively understudied, with few conclusive attempts at offering solutions available in the literature. Similar to its human-related CPP counterpart, the fundamental non-human bioacoustics CPP question involves investigating how (1) animals detect and recognize conspecifics, (2) localize signalers, (3) discriminate among call types, and (4) extract meaningful information even when multiple conspecifics and heterospecifics are signaling simultaneously. 7 | 8 | ## Approach 9 | In order to address the CPP in non-human applications, I propose a low-level approach consisting of two sequential phases: 10 | 11 | 1. Understanding and implementing existing ML-based solutions designed to address human speech or music source separation 12 | 2. Modifying the results from Phase 1 to address the non-human CPP 13 | 14 | While numerous studies have explored the human CPP, yielding promising results, the non-human CPP has garnered relatively less interest and thus remains understudied. The difficulty of the non-human CPP is exacerbated by the challenges of procuring relevant datasets, especially since it is often unfeasible for trained experts to manually annotate recordings involving overlapping signals. This means that obtaining ground truth responses could be significantly harder in non-human studies as opposed to human CPP studies. Additionally, while the neural mechanisms in the human auditory system responsible for discriminating overlapping signals are well-understood, in animals, it remains to some degree unsolved whether non-human auditory processing systems involve similar mechanisms. 15 | 16 | ## Datasets 17 | 18 | 1. WHAM! And WHAMR 19 | 2. WSJ0-2mix and WSJ0-3mix 20 | 3. LibriMix 21 | 4. Microsoft DNS Challenge 22 | 5. SMS_WSJ 23 | 6. MUSDB18 24 | 7. FUSS 25 | 8. AVSpeech 26 | 9. Kinect-WSJ 27 | 28 | For Phase 2, a starting point could potentially require synthesizing artificial datasets by compiling multiple recordings of distinct individual signalers into a single audio stream. A possible data source could be the Gero sperm whale DTAG data, since it contains clear signals that are annotated with the identity of the vocalizing whale. As we progress, it would be important to evaluate the performance of our methods on real-world data (as opposed to manipulated artificial datasets). With this in mind, possible data sources could include: 29 | 30 | 1. The Yossi Yovel Egyptian bat dataset, given that the annotations include ‘Emitters’ and ‘Addressees’ 31 | 2. The Diana Reiss, Marcelo Mancuso, and Andres Babino bow-riding data, since multi-modal learning could confer numerous advantages 32 | 3. The Michelle Fournet and Fred Sharpe humpback whale data 33 | 34 | ## Problems 35 | 36 | Prior to tackling this projects, it is reasonable and appropriate to forecast a number of foreseeable problems (adapted from the ESP list of bioacoustics research questions). Some of these are biology-related, while others are ML-related, but all of the following will likely play some role in our exploration of the CPP. 37 | 38 | 1. Representation of Bioacoustic Signals: some studies have indicated that employing Hilbert-Huang transforms (HHTs) and/or empirical mode decompositions (EMDs) could improve acoustic source separation 39 | 2. Variable Time Scales in Vocal Behavior: given that animal sounds include a variety of vocalizations ranging from transient broadband click-like pulses to extended tonal “song”, it is likely that a solution to the non-human CPP will have to account for multiple time scales even within the context of a single species 40 | 3. Neural Network Architectures: while CNNs have conventionally been used in speech and bioacoustics applications, it could be beneficial to investigate other architectures such as RNNs and/or transformers. 41 | 42 | ## Next Steps 43 | 44 | The logical next step as we begin to address the non-human CPP is to become familiar with the existing techniques and approaches used to solve human speech and music acoustic source separation. A particularly relevant starting point might be the Asteroid PyTorch-based audio source separation toolkit. Several of the other key papers are listed on the Earth Species GitHub. 45 | 46 | ### 6-Week Agenda 47 | 48 | Over the next six weeks, I propose that we focus our attention on understanding and implementing a number of existing techniques designed for human speech and music source separation. This will enable us both to develop a deeper understanding of ML-based audio source separation approaches as well as to gauge the performance of existing technologies. During this time, we can also aim to construct a number of bioacoustic source separation datasets using a variety of species and call types. 49 | 50 | ## More Information 51 | 52 | More information for this project can be found at the project [homepage](https://github.com/orgs/earthspecies/projects/5) 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /Human-Speech-and-Music/Asteroid-Experimentation/Recipes/Asteroid_V0.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "cd asteroid" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "pip install -r requirements.txt" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 12, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "cd .." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "cd asteroid" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 13, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid/egs\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "cd egs" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 14, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "\u001b[0m\u001b[01;34mavspeech\u001b[0m/ \u001b[01;34mdns_challenge\u001b[0m/ \u001b[01;34mkinect-wsj\u001b[0m/ \u001b[01;34mmusdb18\u001b[0m/ \u001b[01;34msms_wsj\u001b[0m/ \u001b[01;34mwham\u001b[0m/ \u001b[01;34mwsj0-mix\u001b[0m/\r\n", 82 | "\u001b[01;34mdemask\u001b[0m/ \u001b[01;34mfuss\u001b[0m/ \u001b[01;34mlibrimix\u001b[0m/ README.md \u001b[01;34mtools\u001b[0m/ \u001b[01;34mwhamr\u001b[0m/\r\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "ls" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 15, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid/egs/tools\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "cd tools" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 16, 110 | "metadata": {}, 111 | "outputs": [ 112 | { 113 | "name": "stdout", 114 | "output_type": "stream", 115 | "text": [ 116 | "\u001b[0m\u001b[01;34msph2pipe_v2.5\u001b[0m/ \u001b[01;31msph2pipe_v2.5.tar.gz\u001b[0m sph2pipe_v2.5.tar.gz.1\r\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "ls" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 19, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid/egs/wham\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "cd wham" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 20, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "\u001b[0m\u001b[01;34mConvTasNet\u001b[0m/ \u001b[01;34mDPTNet\u001b[0m/ \u001b[01;34mFilterbankDesign\u001b[0m/ \u001b[01;34mTwoStep\u001b[0m/\r\n", 151 | "\u001b[01;34mDPRNN\u001b[0m/ \u001b[01;34mDynamicMixing\u001b[0m/ README.md\r\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "ls" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 21, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "name": "stdout", 166 | "output_type": "stream", 167 | "text": [ 168 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid/egs/wham/DPTNet\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "cd DPTNet" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 22, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "name": "stdout", 183 | "output_type": "stream", 184 | "text": [ 185 | "eval.py \u001b[0m\u001b[01;34mlocal\u001b[0m/ README.md \u001b[01;32mrun.sh\u001b[0m* train.py \u001b[01;34mutils\u001b[0m/\r\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "ls" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 23, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "Results from the following experiment will be stored in exp/train_dptnet_sep_clean_8kmin_e52a4b27\n", 203 | "Stage 3: Training\n", 204 | "Traceback (most recent call last):\n", 205 | " File \"train.py\", line 145, in \n", 206 | "{'data': {'mode': 'min',\n", 207 | " 'nondefault_nsrc': None,\n", 208 | " 'sample_rate': 8000,\n", 209 | " 'segment': 2.0,\n", 210 | " 'task': 'sep_clean',\n", 211 | " 'train_dir': 'data/wav8k/min/tr',\n", 212 | " 'valid_dir': 'data/wav8k/min/cv'},\n", 213 | " 'filterbank': {'kernel_size': 16, 'n_filters': 64, 'stride': 8},\n", 214 | " 'main_args': {'exp_dir': 'exp/train_dptnet_sep_clean_8kmin_e52a4b27/',\n", 215 | " 'help': None},\n", 216 | " 'masknet': {'bidirectional': True,\n", 217 | " 'chunk_size': 100,\n", 218 | " 'dropout': 0,\n", 219 | " 'ff_activation': 'relu',\n", 220 | " 'ff_hid': 256,\n", 221 | " 'hop_size': 50,\n", 222 | " 'in_chan': 64,\n", 223 | " 'mask_act': 'sigmoid',\n", 224 | " 'n_repeats': 2,\n", 225 | " 'n_src': 2,\n", 226 | " 'norm_type': 'gLN',\n", 227 | " 'out_chan': 64},\n", 228 | " 'optim': {'lr': 0.001, 'optimizer': 'adam', 'weight_decay': 1e-05},\n", 229 | " 'positional arguments': {},\n", 230 | " 'scheduler': {'d_model': 64, 'steps_per_epoch': 10000},\n", 231 | " 'training': {'batch_size': 4,\n", 232 | " 'early_stop': True,\n", 233 | " 'epochs': 200,\n", 234 | " 'gradient_clipping': 5,\n", 235 | " 'half_lr': True,\n", 236 | " 'num_workers': 4}}\n", 237 | " main(arg_dic)\n", 238 | " File \"train.py\", line 35, in main\n", 239 | " nondefault_nsrc=conf[\"data\"][\"nondefault_nsrc\"],\n", 240 | " File \"/opt/conda/lib/python3.7/site-packages/asteroid/data/wham_dataset.py\", line 98, in __init__\n", 241 | " with open(mix_json, \"r\") as f:\n", 242 | "FileNotFoundError: [Errno 2] No such file or directory: 'data/wav8k/min/tr/mix_clean.json'\n" 243 | ] 244 | } 245 | ], 246 | "source": [ 247 | "!. ./run.sh" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 1, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid\n" 295 | ] 296 | } 297 | ], 298 | "source": [ 299 | "cd asteroid" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 3, 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "name": "stdout", 309 | "output_type": "stream", 310 | "text": [ 311 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid/egs/wsj0-mix\n" 312 | ] 313 | } 314 | ], 315 | "source": [ 316 | "cd egs/wsj0-mix" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 4, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "name": "stdout", 326 | "output_type": "stream", 327 | "text": [ 328 | "\u001b[0m\u001b[01;34mDeepClustering\u001b[0m/ README.md\r\n" 329 | ] 330 | } 331 | ], 332 | "source": [ 333 | "ls" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 5, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid/egs/wsj0-mix/DeepClustering\n" 346 | ] 347 | } 348 | ], 349 | "source": [ 350 | "cd DeepClustering" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 6, 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "name": "stdout", 360 | "output_type": "stream", 361 | "text": [ 362 | "eval.py model.py requirements.txt train.py\r\n", 363 | "\u001b[0m\u001b[01;34mlocal\u001b[0m/ README.md \u001b[01;32mrun.sh\u001b[0m* \u001b[01;36mutils\u001b[0m@\r\n" 364 | ] 365 | } 366 | ], 367 | "source": [ 368 | "ls" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 7, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "Results from the following experiment will be stored in exp/train_chimera_2sep_8kmin_7a1ce493\n", 381 | "Stage 3: Training\n", 382 | "/opt/conda/lib/python3.7/site-packages/asteroid/masknn/blocks.py:8: VisibleDeprecationWarning: `blocks` has been splited between `convolutional` and `recurrent` since asteroid v0.2.0 and will be removed in v0.3.0\n", 383 | " VisibleDeprecationWarning,\n", 384 | "Traceback (most recent call last):\n", 385 | " File \"train.py\", line 204, in \n", 386 | "{'data': {'n_src': 2,\n", 387 | " 'sample_rate': 8000,\n", 388 | " 'train_dir': 'data/2speakers/wav8k/min/tr',\n", 389 | " 'valid_dir': 'data/2speakers/wav8k/min/cv'},\n", 390 | " 'filterbank': {'kernel_size': 256, 'n_filters': 256, 'stride': 64},\n", 391 | " 'main_args': {'exp_dir': 'exp/train_chimera_2sep_8kmin_7a1ce493/',\n", 392 | " 'help': None},\n", 393 | " 'masknet': {'dropout': 0.3,\n", 394 | " 'embedding_dim': 40,\n", 395 | " 'hidden_size': 600,\n", 396 | " 'n_layers': 4,\n", 397 | " 'rnn_type': 'lstm',\n", 398 | " 'take_log': True},\n", 399 | " 'optim': {'lr': 0.0001, 'optimizer': 'rmsprop', 'weight_decay': 0.0},\n", 400 | " 'positional arguments': {},\n", 401 | " 'training': {'batch_size': 32,\n", 402 | " 'early_stop': True,\n", 403 | " 'epochs': 200,\n", 404 | " 'half_lr': True,\n", 405 | " 'loss_alpha': 1.0,\n", 406 | " 'num_workers': 8}}\n", 407 | " main(arg_dic)\n", 408 | " File \"train.py\", line 27, in main\n", 409 | " train_loader, val_loader = make_dataloaders(**conf[\"data\"], **conf[\"training\"])\n", 410 | " File \"/opt/conda/lib/python3.7/site-packages/asteroid/data/wsj0_mix.py\", line 20, in make_dataloaders\n", 411 | " train_set = Wsj0mixDataset(train_dir, n_src=n_src, sample_rate=sample_rate, segment=segment)\n", 412 | " File \"/opt/conda/lib/python3.7/site-packages/asteroid/data/wsj0_mix.py\", line 64, in __init__\n", 413 | " with open(mix_json, \"r\") as f:\n", 414 | "FileNotFoundError: [Errno 2] No such file or directory: 'data/2speakers/wav8k/min/tr/mix.json'\n" 415 | ] 416 | } 417 | ], 418 | "source": [ 419 | "!. ./run.sh" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": {}, 433 | "outputs": [], 434 | "source": [] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "metadata": {}, 447 | "outputs": [], 448 | "source": [] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": null, 467 | "metadata": {}, 468 | "outputs": [], 469 | "source": [] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": {}, 475 | "outputs": [], 476 | "source": [] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 3, 481 | "metadata": {}, 482 | "outputs": [ 483 | { 484 | "name": "stdout", 485 | "output_type": "stream", 486 | "text": [ 487 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid\n" 488 | ] 489 | } 490 | ], 491 | "source": [ 492 | "cd asteroid" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": 4, 498 | "metadata": {}, 499 | "outputs": [ 500 | { 501 | "name": "stdout", 502 | "output_type": "stream", 503 | "text": [ 504 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid/egs/wham/ConvTasNet\n" 505 | ] 506 | } 507 | ], 508 | "source": [ 509 | "cd egs/wham/ConvTasNet" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 5, 515 | "metadata": {}, 516 | "outputs": [ 517 | { 518 | "name": "stdout", 519 | "output_type": "stream", 520 | "text": [ 521 | "Stage 0: Converting sphere files to wav files\n", 522 | "Download sph2pipe_v2.5 into egs/tools\n", 523 | "--2020-09-15 16:59:20-- http://www.openslr.org/resources/3/sph2pipe_v2.5.tar.gz\n", 524 | "Resolving www.openslr.org (www.openslr.org)... 46.101.158.64\n", 525 | "Connecting to www.openslr.org (www.openslr.org)|46.101.158.64|:80... connected.\n", 526 | "HTTP request sent, awaiting response... 200 OK\n", 527 | "Length: 329832 (322K) [application/x-gzip]\n", 528 | "Saving to: ‘../../tools/sph2pipe_v2.5.tar.gz’\n", 529 | "\n", 530 | "sph2pipe_v2.5.tar.g 100%[===================>] 322.10K 377KB/s in 0.9s \n", 531 | "\n", 532 | "2020-09-15 16:59:21 (377 KB/s) - ‘../../tools/sph2pipe_v2.5.tar.gz’ saved [329832/329832]\n", 533 | "\n", 534 | "sph2pipe_v2.5\n", 535 | "sph2pipe_v2.5/0readme.1st\n", 536 | "sph2pipe_v2.5/bitshift.h\n", 537 | "sph2pipe_v2.5/file_headers.c\n", 538 | "sph2pipe_v2.5/shorten_x.c\n", 539 | "sph2pipe_v2.5/sph2pipe.c\n", 540 | "sph2pipe_v2.5/sph_convert.h\n", 541 | "sph2pipe_v2.5/test\n", 542 | "sph2pipe_v2.5/test/123_1pcbe_shn.sph\n", 543 | "sph2pipe_v2.5/test/123_1pcle_shn.sph\n", 544 | "sph2pipe_v2.5/test/123_1ulaw_shn.sph\n", 545 | "sph2pipe_v2.5/test/123_2alaw.sph\n", 546 | "sph2pipe_v2.5/test/123_2pcbe_shn.sph\n", 547 | "sph2pipe_v2.5/test/123_2pcle_shn.sph\n", 548 | "sph2pipe_v2.5/test/123_2ulaw_shn.sph\n", 549 | "sph2pipe_v2.5/test/big1.hdr\n", 550 | "sph2pipe_v2.5/test/big2.hdr\n", 551 | "sph2pipe_v2.5/test/outfile-md5.list.macosx\n", 552 | "sph2pipe_v2.5/test/std1.hdr\n", 553 | "sph2pipe_v2.5/test/std2.hdr\n", 554 | "sph2pipe_v2.5/test/test_all.pl\n", 555 | "sph2pipe_v2.5/ulaw.h\n", 556 | "sph2pipe_v2.5/sph2pipe.exe\n", 557 | "\u001b[01m\u001b[Ksph2pipe_v2.5/file_headers.c:\u001b[m\u001b[K In function ‘\u001b[01m\u001b[Kcopyshort\u001b[m\u001b[K’:\n", 558 | "\u001b[01m\u001b[Ksph2pipe_v2.5/file_headers.c:326:2:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[Kimplicit declaration of function ‘\u001b[01m\u001b[Kswab\u001b[m\u001b[K’ [\u001b[01;35m\u001b[K-Wimplicit-function-declaration\u001b[m\u001b[K]\n", 559 | " \u001b[01;35m\u001b[Kswab\u001b[m\u001b[K((char *) &val, short_order.ch, 2 );\n", 560 | " \u001b[01;35m\u001b[K^~~~\u001b[m\u001b[K\n", 561 | "\u001b[01m\u001b[Ksph2pipe_v2.5/file_headers.c:\u001b[m\u001b[K At top level:\n", 562 | "\u001b[01m\u001b[Ksph2pipe_v2.5/file_headers.c:579:1:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[Kreturn type defaults to ‘\u001b[01m\u001b[Kint\u001b[m\u001b[K’ [\u001b[01;35m\u001b[K-Wimplicit-int\u001b[m\u001b[K]\n", 563 | " \u001b[01;35m\u001b[KConvertToIeeeExtended\u001b[m\u001b[K(num, bytes)\n", 564 | " \u001b[01;35m\u001b[K^~~~~~~~~~~~~~~~~~~~~\u001b[m\u001b[K\n", 565 | "\u001b[01m\u001b[Ksph2pipe_v2.5/shorten_x.c:\u001b[m\u001b[K In function ‘\u001b[01m\u001b[Kfwrite_type\u001b[m\u001b[K’:\n", 566 | "\u001b[01m\u001b[Ksph2pipe_v2.5/shorten_x.c:325:22:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[Kimplicit declaration of function ‘\u001b[01m\u001b[Kpcm2alaw\u001b[m\u001b[K’ [\u001b[01;35m\u001b[K-Wimplicit-function-declaration\u001b[m\u001b[K]\n", 567 | " *writebufp++ = \u001b[01;35m\u001b[Kpcm2alaw\u001b[m\u001b[K( ulaw2pcm[data0[i]] );\n", 568 | " \u001b[01;35m\u001b[K^~~~~~~~\u001b[m\u001b[K\n", 569 | "\u001b[01m\u001b[Ksph2pipe_v2.5/shorten_x.c:381:24:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[Kimplicit declaration of function ‘\u001b[01m\u001b[Kpcm2ulaw\u001b[m\u001b[K’ [\u001b[01;35m\u001b[K-Wimplicit-function-declaration\u001b[m\u001b[K]\n", 570 | " *writebufp++ = \u001b[01;35m\u001b[Kpcm2ulaw\u001b[m\u001b[K( data0[i] );\n", 571 | " \u001b[01;35m\u001b[K^~~~~~~~\u001b[m\u001b[K\n", 572 | "\u001b[01m\u001b[Ksph2pipe_v2.5/shorten_x.c:464:6:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[Kimplicit declaration of function ‘\u001b[01m\u001b[Kswab\u001b[m\u001b[K’ [\u001b[01;35m\u001b[K-Wimplicit-function-declaration\u001b[m\u001b[K]\n", 573 | " \u001b[01;35m\u001b[Kswab\u001b[m\u001b[K(writebuf, writefub, sizeout * nchanout * nitem);\n", 574 | " \u001b[01;35m\u001b[K^~~~\u001b[m\u001b[K\n", 575 | "\u001b[01m\u001b[Ksph2pipe_v2.5/sph2pipe.c:\u001b[m\u001b[K In function ‘\u001b[01m\u001b[KgetUserOpts\u001b[m\u001b[K’:\n", 576 | "\u001b[01m\u001b[Ksph2pipe_v2.5/sph2pipe.c:191:18:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[Kimplicit declaration of function ‘\u001b[01m\u001b[Kgetopt\u001b[m\u001b[K’ [\u001b[01;35m\u001b[K-Wimplicit-function-declaration\u001b[m\u001b[K]\n", 577 | " while (( i = \u001b[01;35m\u001b[Kgetopt\u001b[m\u001b[K( ac, av, \"daupf:c:t:s:h:\" )) != EOF )\n", 578 | " \u001b[01;35m\u001b[K^~~~~~\u001b[m\u001b[K\n", 579 | "\u001b[01m\u001b[Ksph2pipe_v2.5/sph2pipe.c:\u001b[m\u001b[K In function ‘\u001b[01m\u001b[KcopySamples\u001b[m\u001b[K’:\n", 580 | "\u001b[01m\u001b[Ksph2pipe_v2.5/sph2pipe.c:537:3:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[Kimplicit declaration of function ‘\u001b[01m\u001b[Kswab\u001b[m\u001b[K’ [\u001b[01;35m\u001b[K-Wimplicit-function-declaration\u001b[m\u001b[K]\n", 581 | " \u001b[01;35m\u001b[Kswab\u001b[m\u001b[K( outbuf, inpbuf, nb ); /* it, do byte swapping too */\n", 582 | " \u001b[01;35m\u001b[K^~~~\u001b[m\u001b[K\n", 583 | "/home/peterbermant/Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/asteroid/egs/wham/ConvTasNet\n", 584 | "Convert sphere format to wav format\n", 585 | "find: unknown predicate `--wav_dir'\n", 586 | "Try 'find --help' for more information.\n" 587 | ] 588 | } 589 | ], 590 | "source": [ 591 | "!. ./run.sh" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": null, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": null, 611 | "metadata": {}, 612 | "outputs": [], 613 | "source": [] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": {}, 619 | "outputs": [], 620 | "source": [] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": null, 625 | "metadata": {}, 626 | "outputs": [], 627 | "source": [] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": null, 632 | "metadata": {}, 633 | "outputs": [], 634 | "source": [] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": null, 639 | "metadata": {}, 640 | "outputs": [], 641 | "source": [] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [ 656 | "cd" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "metadata": {}, 663 | "outputs": [], 664 | "source": [ 665 | "cd Workspace/Cocktail-Party-Problem/Human-Speech-and-Music/Asteroid-Experimentation/Recipes/Asteroid" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": null, 671 | "metadata": {}, 672 | "outputs": [], 673 | "source": [ 674 | "cd egs" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": null, 680 | "metadata": {}, 681 | "outputs": [], 682 | "source": [ 683 | "ls" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "metadata": {}, 690 | "outputs": [], 691 | "source": [ 692 | "cd wham" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": null, 698 | "metadata": {}, 699 | "outputs": [], 700 | "source": [ 701 | "ls" 702 | ] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "execution_count": null, 707 | "metadata": {}, 708 | "outputs": [], 709 | "source": [ 710 | "cd ConvTasNet" 711 | ] 712 | }, 713 | { 714 | "cell_type": "code", 715 | "execution_count": null, 716 | "metadata": {}, 717 | "outputs": [], 718 | "source": [ 719 | "ls" 720 | ] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "execution_count": null, 725 | "metadata": {}, 726 | "outputs": [], 727 | "source": [ 728 | "!. ./run.sh" 729 | ] 730 | }, 731 | { 732 | "cell_type": "code", 733 | "execution_count": null, 734 | "metadata": {}, 735 | "outputs": [], 736 | "source": [ 737 | "cd tmp" 738 | ] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "execution_count": null, 743 | "metadata": {}, 744 | "outputs": [], 745 | "source": [ 746 | "ls" 747 | ] 748 | }, 749 | { 750 | "cell_type": "code", 751 | "execution_count": null, 752 | "metadata": {}, 753 | "outputs": [], 754 | "source": [ 755 | "cd wham_scripts" 756 | ] 757 | }, 758 | { 759 | "cell_type": "code", 760 | "execution_count": null, 761 | "metadata": {}, 762 | "outputs": [], 763 | "source": [ 764 | "ls" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": null, 770 | "metadata": {}, 771 | "outputs": [], 772 | "source": [] 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": null, 777 | "metadata": {}, 778 | "outputs": [], 779 | "source": [] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": null, 784 | "metadata": {}, 785 | "outputs": [], 786 | "source": [] 787 | }, 788 | { 789 | "cell_type": "code", 790 | "execution_count": null, 791 | "metadata": {}, 792 | "outputs": [], 793 | "source": [] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": null, 798 | "metadata": {}, 799 | "outputs": [], 800 | "source": [] 801 | }, 802 | { 803 | "cell_type": "code", 804 | "execution_count": null, 805 | "metadata": {}, 806 | "outputs": [], 807 | "source": [] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": null, 812 | "metadata": {}, 813 | "outputs": [], 814 | "source": [] 815 | }, 816 | { 817 | "cell_type": "code", 818 | "execution_count": null, 819 | "metadata": {}, 820 | "outputs": [], 821 | "source": [] 822 | }, 823 | { 824 | "cell_type": "code", 825 | "execution_count": null, 826 | "metadata": {}, 827 | "outputs": [], 828 | "source": [] 829 | }, 830 | { 831 | "cell_type": "code", 832 | "execution_count": null, 833 | "metadata": {}, 834 | "outputs": [], 835 | "source": [ 836 | "cd Asteroid" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": null, 842 | "metadata": {}, 843 | "outputs": [], 844 | "source": [ 845 | "cd egs/librimix" 846 | ] 847 | }, 848 | { 849 | "cell_type": "code", 850 | "execution_count": null, 851 | "metadata": {}, 852 | "outputs": [], 853 | "source": [ 854 | "cd ConvTasNet" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": null, 860 | "metadata": {}, 861 | "outputs": [], 862 | "source": [ 863 | "!. ./run.sh" 864 | ] 865 | }, 866 | { 867 | "cell_type": "code", 868 | "execution_count": null, 869 | "metadata": {}, 870 | "outputs": [], 871 | "source": [] 872 | }, 873 | { 874 | "cell_type": "code", 875 | "execution_count": null, 876 | "metadata": {}, 877 | "outputs": [], 878 | "source": [] 879 | }, 880 | { 881 | "cell_type": "code", 882 | "execution_count": null, 883 | "metadata": {}, 884 | "outputs": [], 885 | "source": [] 886 | }, 887 | { 888 | "cell_type": "code", 889 | "execution_count": null, 890 | "metadata": {}, 891 | "outputs": [], 892 | "source": [] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": null, 897 | "metadata": {}, 898 | "outputs": [], 899 | "source": [] 900 | }, 901 | { 902 | "cell_type": "code", 903 | "execution_count": null, 904 | "metadata": {}, 905 | "outputs": [], 906 | "source": [] 907 | }, 908 | { 909 | "cell_type": "code", 910 | "execution_count": null, 911 | "metadata": {}, 912 | "outputs": [], 913 | "source": [] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "execution_count": null, 918 | "metadata": {}, 919 | "outputs": [], 920 | "source": [] 921 | }, 922 | { 923 | "cell_type": "code", 924 | "execution_count": null, 925 | "metadata": {}, 926 | "outputs": [], 927 | "source": [] 928 | }, 929 | { 930 | "cell_type": "code", 931 | "execution_count": null, 932 | "metadata": {}, 933 | "outputs": [], 934 | "source": [] 935 | }, 936 | { 937 | "cell_type": "code", 938 | "execution_count": null, 939 | "metadata": {}, 940 | "outputs": [], 941 | "source": [] 942 | }, 943 | { 944 | "cell_type": "code", 945 | "execution_count": null, 946 | "metadata": {}, 947 | "outputs": [], 948 | "source": [] 949 | }, 950 | { 951 | "cell_type": "code", 952 | "execution_count": null, 953 | "metadata": {}, 954 | "outputs": [], 955 | "source": [ 956 | "cd Asteroid" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": null, 962 | "metadata": {}, 963 | "outputs": [], 964 | "source": [ 965 | "ls" 966 | ] 967 | }, 968 | { 969 | "cell_type": "code", 970 | "execution_count": null, 971 | "metadata": {}, 972 | "outputs": [], 973 | "source": [ 974 | "cd egs/wham/ConvTasNet" 975 | ] 976 | }, 977 | { 978 | "cell_type": "code", 979 | "execution_count": null, 980 | "metadata": {}, 981 | "outputs": [], 982 | "source": [ 983 | "ls" 984 | ] 985 | }, 986 | { 987 | "cell_type": "code", 988 | "execution_count": null, 989 | "metadata": {}, 990 | "outputs": [], 991 | "source": [ 992 | "!. ./run.sh" 993 | ] 994 | }, 995 | { 996 | "cell_type": "code", 997 | "execution_count": null, 998 | "metadata": {}, 999 | "outputs": [], 1000 | "source": [ 1001 | "cd .." 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "code", 1006 | "execution_count": null, 1007 | "metadata": {}, 1008 | "outputs": [], 1009 | "source": [ 1010 | "cd .." 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "code", 1015 | "execution_count": null, 1016 | "metadata": {}, 1017 | "outputs": [], 1018 | "source": [ 1019 | "ls" 1020 | ] 1021 | }, 1022 | { 1023 | "cell_type": "code", 1024 | "execution_count": null, 1025 | "metadata": {}, 1026 | "outputs": [], 1027 | "source": [ 1028 | "cd wsj0-mix" 1029 | ] 1030 | }, 1031 | { 1032 | "cell_type": "code", 1033 | "execution_count": null, 1034 | "metadata": {}, 1035 | "outputs": [], 1036 | "source": [ 1037 | "ls" 1038 | ] 1039 | }, 1040 | { 1041 | "cell_type": "code", 1042 | "execution_count": null, 1043 | "metadata": {}, 1044 | "outputs": [], 1045 | "source": [ 1046 | "cd .." 1047 | ] 1048 | }, 1049 | { 1050 | "cell_type": "code", 1051 | "execution_count": null, 1052 | "metadata": {}, 1053 | "outputs": [], 1054 | "source": [ 1055 | "ls" 1056 | ] 1057 | }, 1058 | { 1059 | "cell_type": "code", 1060 | "execution_count": null, 1061 | "metadata": {}, 1062 | "outputs": [], 1063 | "source": [ 1064 | "cd egs/librimix" 1065 | ] 1066 | }, 1067 | { 1068 | "cell_type": "code", 1069 | "execution_count": null, 1070 | "metadata": {}, 1071 | "outputs": [], 1072 | "source": [ 1073 | "cd ConvTasNet" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "code", 1078 | "execution_count": null, 1079 | "metadata": {}, 1080 | "outputs": [], 1081 | "source": [ 1082 | "ls" 1083 | ] 1084 | }, 1085 | { 1086 | "cell_type": "code", 1087 | "execution_count": null, 1088 | "metadata": {}, 1089 | "outputs": [], 1090 | "source": [ 1091 | "!. ./run.sh" 1092 | ] 1093 | } 1094 | ], 1095 | "metadata": { 1096 | "kernelspec": { 1097 | "display_name": "Python 3", 1098 | "language": "python", 1099 | "name": "python3" 1100 | }, 1101 | "language_info": { 1102 | "codemirror_mode": { 1103 | "name": "ipython", 1104 | "version": 3 1105 | }, 1106 | "file_extension": ".py", 1107 | "mimetype": "text/x-python", 1108 | "name": "python", 1109 | "nbconvert_exporter": "python", 1110 | "pygments_lexer": "ipython3", 1111 | "version": "3.7.6" 1112 | } 1113 | }, 1114 | "nbformat": 4, 1115 | "nbformat_minor": 4 1116 | } 1117 | --------------------------------------------------------------------------------