├── models
    ├── __init__.py
    ├── utils.py
    ├── convolutional_transformer.py
    └── model.py
├── docs
    ├── imgs
    │   └── model.png
    ├── favicons
    │   ├── favicon.ico
    │   ├── apple-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   ├── favicon-96x96.png
    │   ├── ms-icon-70x70.png
    │   ├── ms-icon-144x144.png
    │   ├── ms-icon-150x150.png
    │   ├── ms-icon-310x310.png
    │   ├── android-icon-36x36.png
    │   ├── android-icon-48x48.png
    │   ├── android-icon-72x72.png
    │   ├── android-icon-96x96.png
    │   ├── apple-icon-114x114.png
    │   ├── apple-icon-120x120.png
    │   ├── apple-icon-144x144.png
    │   ├── apple-icon-152x152.png
    │   ├── apple-icon-180x180.png
    │   ├── apple-icon-57x57.png
    │   ├── apple-icon-60x60.png
    │   ├── apple-icon-72x72.png
    │   ├── apple-icon-76x76.png
    │   ├── android-icon-144x144.png
    │   ├── android-icon-192x192.png
    │   ├── apple-icon-precomposed.png
    │   ├── browserconfig.xml
    │   └── manifest.json
    ├── wavs
    │   ├── apc-apc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── apc-cpc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── apc-mel
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── apc-ppg
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── apc-w2v
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── cpc-apc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── cpc-cpc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── cpc-mel
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── cpc-ppg
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── cpc-w2v
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── mel-apc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── mel-cpc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── mel-mel
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── mel-ppg
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── mel-w2v
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── ppg-apc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── ppg-cpc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── ppg-mel
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── ppg-ppg
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── ppg-w2v
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── w2v-apc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── w2v-cpc
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── w2v-mel
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── w2v-ppg
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   ├── w2v-w2v
    │   │   ├── seen-f2f.wav
    │   │   ├── seen-f2m.wav
    │   │   ├── seen-m2f.wav
    │   │   ├── seen-m2m.wav
    │   │   ├── unseen-f2f.wav
    │   │   ├── unseen-f2m.wav
    │   │   ├── unseen-m2f.wav
    │   │   └── unseen-m2m.wav
    │   └── Ground-truth
    │   │   ├── seen-f2f-source.wav
    │   │   ├── seen-f2f-target.wav
    │   │   ├── seen-f2m-source.wav
    │   │   ├── seen-f2m-target.wav
    │   │   ├── seen-m2f-source.wav
    │   │   ├── seen-m2f-target.wav
    │   │   ├── seen-m2m-source.wav
    │   │   ├── seen-m2m-target.wav
    │   │   ├── unseen-f2f-source.wav
    │   │   ├── unseen-f2f-target.wav
    │   │   ├── unseen-f2m-source.wav
    │   │   ├── unseen-f2m-target.wav
    │   │   ├── unseen-m2f-source.wav
    │   │   ├── unseen-m2f-target.wav
    │   │   ├── unseen-m2m-source.wav
    │   │   └── unseen-m2m-target.wav
    ├── styles.css
    └── index.html
├── data
    ├── __init__.py
    ├── preprocess_dataset.py
    ├── feature_extract.py
    ├── utils.py
    └── intra_speaker_dataset.py
├── requirements.txt
├── merger.py
├── preprocess.py
├── info.yaml
├── convert_batch.py
├── README.md
└── train.py


/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import S2VC
2 | from .utils import *
3 | 


--------------------------------------------------------------------------------
/docs/imgs/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/imgs/model.png


--------------------------------------------------------------------------------
/docs/favicons/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/favicon.ico


--------------------------------------------------------------------------------
/docs/favicons/apple-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon.png


--------------------------------------------------------------------------------
/docs/favicons/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/favicon-16x16.png


--------------------------------------------------------------------------------
/docs/favicons/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/favicon-32x32.png


--------------------------------------------------------------------------------
/docs/favicons/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/favicon-96x96.png


--------------------------------------------------------------------------------
/docs/favicons/ms-icon-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/ms-icon-70x70.png


--------------------------------------------------------------------------------
/docs/wavs/apc-apc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-apc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-apc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-apc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-apc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-apc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-apc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-apc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-cpc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-cpc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-cpc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-cpc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-cpc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-cpc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-cpc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-cpc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-mel/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-mel/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-mel/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-mel/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-mel/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-mel/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-mel/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-mel/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-ppg/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-ppg/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-ppg/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-ppg/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-ppg/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-ppg/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-ppg/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-ppg/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-w2v/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-w2v/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-w2v/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-w2v/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-w2v/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-w2v/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-w2v/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-w2v/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-apc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-apc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-apc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-apc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-apc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-apc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-apc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-apc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-cpc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-cpc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-cpc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-cpc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-cpc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-cpc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-cpc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-cpc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-mel/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-mel/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-mel/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-mel/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-mel/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-mel/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-mel/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-mel/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-ppg/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-ppg/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-ppg/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-ppg/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-ppg/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-ppg/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-ppg/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-ppg/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-w2v/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-w2v/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-w2v/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-w2v/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-w2v/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-w2v/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-w2v/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-w2v/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-apc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-apc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-apc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-apc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-apc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-apc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-apc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-apc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-cpc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-cpc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-cpc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-cpc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-cpc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-cpc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-cpc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-cpc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-mel/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-mel/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-mel/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-mel/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-mel/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-mel/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-mel/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-mel/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-ppg/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-ppg/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-ppg/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-ppg/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-ppg/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-ppg/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-ppg/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-ppg/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-w2v/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-w2v/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-w2v/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-w2v/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-w2v/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-w2v/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-w2v/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-w2v/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-apc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-apc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-apc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-apc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-apc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-apc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-apc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-apc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-cpc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-cpc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-cpc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-cpc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-cpc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-cpc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-cpc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-cpc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-mel/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-mel/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-mel/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-mel/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-mel/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-mel/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-mel/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-mel/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-ppg/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-ppg/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-ppg/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-ppg/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-ppg/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-ppg/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-ppg/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-ppg/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-w2v/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-w2v/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-w2v/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-w2v/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-w2v/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-w2v/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-w2v/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-w2v/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-apc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-apc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-apc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-apc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-apc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-apc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-apc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-apc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-cpc/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-cpc/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-cpc/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-cpc/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-cpc/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-cpc/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-cpc/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-cpc/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-mel/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-mel/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-mel/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-mel/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-mel/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-mel/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-mel/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-mel/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-ppg/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-ppg/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-ppg/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-ppg/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-ppg/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-ppg/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-ppg/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-ppg/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-w2v/seen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-w2v/seen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-w2v/seen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-w2v/seen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-w2v/seen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-w2v/seen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-w2v/seen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-w2v/seen-m2m.wav


--------------------------------------------------------------------------------
/docs/favicons/ms-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/ms-icon-144x144.png


--------------------------------------------------------------------------------
/docs/favicons/ms-icon-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/ms-icon-150x150.png


--------------------------------------------------------------------------------
/docs/favicons/ms-icon-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/ms-icon-310x310.png


--------------------------------------------------------------------------------
/docs/wavs/apc-apc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-apc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-apc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-apc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-apc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-apc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-apc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-apc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-cpc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-cpc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-cpc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-cpc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-cpc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-cpc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-cpc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-cpc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-mel/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-mel/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-mel/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-mel/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-mel/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-mel/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-mel/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-mel/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-ppg/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-ppg/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-ppg/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-ppg/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-ppg/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-ppg/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-ppg/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-ppg/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-w2v/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-w2v/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-w2v/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-w2v/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-w2v/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-w2v/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/apc-w2v/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/apc-w2v/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-apc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-apc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-apc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-apc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-apc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-apc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-apc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-apc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-cpc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-cpc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-cpc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-cpc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-cpc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-cpc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-cpc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-cpc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-mel/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-mel/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-mel/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-mel/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-mel/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-mel/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-mel/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-mel/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-ppg/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-ppg/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-ppg/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-ppg/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-ppg/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-ppg/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-ppg/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-ppg/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-w2v/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-w2v/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-w2v/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-w2v/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-w2v/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-w2v/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/cpc-w2v/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/cpc-w2v/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-apc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-apc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-apc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-apc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-apc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-apc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-apc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-apc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-cpc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-cpc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-cpc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-cpc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-cpc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-cpc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-cpc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-cpc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-mel/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-mel/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-mel/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-mel/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-mel/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-mel/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-mel/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-mel/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-ppg/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-ppg/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-ppg/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-ppg/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-ppg/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-ppg/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-ppg/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-ppg/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-w2v/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-w2v/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-w2v/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-w2v/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-w2v/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-w2v/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/mel-w2v/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/mel-w2v/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-apc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-apc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-apc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-apc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-apc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-apc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-apc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-apc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-cpc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-cpc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-cpc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-cpc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-cpc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-cpc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-cpc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-cpc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-mel/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-mel/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-mel/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-mel/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-mel/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-mel/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-mel/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-mel/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-ppg/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-ppg/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-ppg/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-ppg/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-ppg/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-ppg/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-ppg/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-ppg/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-w2v/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-w2v/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-w2v/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-w2v/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-w2v/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-w2v/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/ppg-w2v/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/ppg-w2v/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-apc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-apc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-apc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-apc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-apc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-apc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-apc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-apc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-cpc/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-cpc/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-cpc/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-cpc/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-cpc/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-cpc/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-cpc/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-cpc/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-mel/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-mel/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-mel/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-mel/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-mel/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-mel/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-mel/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-mel/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-ppg/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-ppg/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-ppg/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-ppg/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-ppg/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-ppg/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-ppg/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-ppg/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-w2v/unseen-f2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-w2v/unseen-f2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-w2v/unseen-f2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-w2v/unseen-f2m.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-w2v/unseen-m2f.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-w2v/unseen-m2f.wav


--------------------------------------------------------------------------------
/docs/wavs/w2v-w2v/unseen-m2m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/w2v-w2v/unseen-m2m.wav


--------------------------------------------------------------------------------
/docs/favicons/android-icon-36x36.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/android-icon-36x36.png


--------------------------------------------------------------------------------
/docs/favicons/android-icon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/android-icon-48x48.png


--------------------------------------------------------------------------------
/docs/favicons/android-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/android-icon-72x72.png


--------------------------------------------------------------------------------
/docs/favicons/android-icon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/android-icon-96x96.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-114x114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-114x114.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-120x120.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-144x144.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-152x152.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-180x180.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-57x57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-57x57.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-60x60.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-72x72.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-76x76.png


--------------------------------------------------------------------------------
/docs/favicons/android-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/android-icon-144x144.png


--------------------------------------------------------------------------------
/docs/favicons/android-icon-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/android-icon-192x192.png


--------------------------------------------------------------------------------
/docs/favicons/apple-icon-precomposed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/favicons/apple-icon-precomposed.png


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/seen-f2f-source.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/seen-f2f-source.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/seen-f2f-target.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/seen-f2f-target.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/seen-f2m-source.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/seen-f2m-source.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/seen-f2m-target.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/seen-f2m-target.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/seen-m2f-source.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/seen-m2f-source.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/seen-m2f-target.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/seen-m2f-target.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/seen-m2m-source.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/seen-m2m-source.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/seen-m2m-target.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/seen-m2m-target.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/unseen-f2f-source.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/unseen-f2f-source.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/unseen-f2f-target.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/unseen-f2f-target.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/unseen-f2m-source.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/unseen-f2m-source.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/unseen-f2m-target.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/unseen-f2m-target.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/unseen-m2f-source.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/unseen-m2f-source.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/unseen-m2f-target.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/unseen-m2f-target.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/unseen-m2m-source.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/unseen-m2m-source.wav


--------------------------------------------------------------------------------
/docs/wavs/Ground-truth/unseen-m2m-target.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/howard1337/S2VC/HEAD/docs/wavs/Ground-truth/unseen-m2m-target.wav


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .preprocess_dataset import PreprocessDataset
2 | from .intra_speaker_dataset import IntraSpeakerDataset, collate_batch
3 | from .utils import *
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | sox
 2 | gdown
 3 | tqdm
 4 | librosa
 5 | soundfile
 6 | torch==1.7.1
 7 | torchaudio==0.7.2
 8 | argparse
 9 | matplotlib
10 | tensorboard
11 | -e git://github.com/pytorch/fairseq.git@1a709b2a401ac8bd6d805c8a6a5f4d7f03b923ff#egg=fairseq
12 | 


--------------------------------------------------------------------------------
/docs/favicons/browserconfig.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <browserconfig><msapplication><tile><square70x70logo src="/ms-icon-70x70.png"/><square150x150logo src="/ms-icon-150x150.png"/><square310x310logo src="/ms-icon-310x310.png"/><TileColor>#ffffff</TileColor></tile></msapplication></browserconfig>


--------------------------------------------------------------------------------
/merger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import json
 4 | 
 5 | """Merge the metadata.json of different features"""
 6 | 
 7 | dataset_dir = sys.argv[1]
 8 | sub_dirs = [i for i in os.listdir(sys.argv[1]) if 'json' not in i]
 9 | 
10 | 
11 | 
12 | metas = []
13 | merged = {}
14 | 
15 | for sub_dir in sub_dirs:
16 |     metas.append(json.load(open(os.path.join(dataset_dir, sub_dir, 'metadata.json'))))
17 | 
18 | for key in metas[0].keys():
19 |     if key == 'feature_name':
20 |         continue
21 |     merged[key] = [{} for i in range(len(metas[0][key]))]
22 |     for subdir, meta in zip(sub_dirs, metas):
23 |         for idx, value in enumerate(meta[key]):
24 |             merged[key][idx]['audio_path'] = value['audio_path']
25 |             merged[key][idx][meta['feature_name']] = os.path.join(subdir, value['feature_path'])
26 | 
27 | json.dump(merged, open(os.path.join(dataset_dir, 'metadata.json'), 'w'), indent=2)
28 | 


--------------------------------------------------------------------------------
/docs/favicons/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "name": "App",
 3 |  "icons": [
 4 |   {
 5 |    "src": "\/android-icon-36x36.png",
 6 |    "sizes": "36x36",
 7 |    "type": "image\/png",
 8 |    "density": "0.75"
 9 |   },
10 |   {
11 |    "src": "\/android-icon-48x48.png",
12 |    "sizes": "48x48",
13 |    "type": "image\/png",
14 |    "density": "1.0"
15 |   },
16 |   {
17 |    "src": "\/android-icon-72x72.png",
18 |    "sizes": "72x72",
19 |    "type": "image\/png",
20 |    "density": "1.5"
21 |   },
22 |   {
23 |    "src": "\/android-icon-96x96.png",
24 |    "sizes": "96x96",
25 |    "type": "image\/png",
26 |    "density": "2.0"
27 |   },
28 |   {
29 |    "src": "\/android-icon-144x144.png",
30 |    "sizes": "144x144",
31 |    "type": "image\/png",
32 |    "density": "3.0"
33 |   },
34 |   {
35 |    "src": "\/android-icon-192x192.png",
36 |    "sizes": "192x192",
37 |    "type": "image\/png",
38 |    "density": "4.0"
39 |   }
40 |  ]
41 | }


--------------------------------------------------------------------------------
/data/preprocess_dataset.py:
--------------------------------------------------------------------------------
 1 | """Precompute Wav2Vec features and spectrograms."""
 2 | 
 3 | from copy import deepcopy
 4 | from pathlib import Path
 5 | 
 6 | import torch
 7 | from librosa.util import find_files
 8 | 
 9 | import sox
10 | 
11 | from .utils import load_wav, log_mel_spectrogram
12 | class PreprocessDataset(torch.utils.data.Dataset):
13 |     """Prefetch audio data for preprocessing."""
14 | 
15 |     def __init__(
16 |         self,
17 |         data_dirs,
18 |         trim_method,
19 |         sample_rate,
20 |     ):
21 | 
22 |         data = []
23 | 
24 |         for data_dir in data_dirs:
25 |             data_dir_path = Path(data_dir)
26 |             speaker_dirs = [x for x in data_dir_path.iterdir() if x.is_dir()]
27 | 
28 |             for speaker_dir in speaker_dirs:
29 |                 audio_paths = find_files(speaker_dir)
30 |                 if len(audio_paths) == 0:
31 |                     continue
32 | 
33 |                 speaker_name = speaker_dir.name
34 |                 for audio_path in audio_paths:
35 |                     data.append((speaker_name, audio_path))
36 | 
37 |         self.trim_method = trim_method
38 |         self.sample_rate = sample_rate
39 |         self.data = data
40 | 
41 |         if trim_method == "vad":
42 |             tfm = sox.Transformer()
43 |             tfm.vad(location=1)
44 |             tfm.vad(location=-1)
45 |             self.sox_transform = tfm
46 | 
47 |     def __len__(self):
48 |         return len(self.data)
49 | 
50 |     def __getitem__(self, index):
51 |         speaker_name, audio_path = self.data[index]
52 | 
53 |         if self.trim_method == "librosa":
54 |             wav = load_wav(audio_path, self.sample_rate, trim=True)
55 |         elif self.trim_method == "vad":
56 |             wav = load_wav(audio_path, self.sample_rate)
57 |             trim_wav = self.sox_transform.build_array(
58 |                 input_array=wav, sample_rate_in=self.sample_rate
59 |             )
60 |             wav = deepcopy(trim_wav if len(trim_wav) > 10 else wav)
61 |         return speaker_name, audio_path, torch.FloatTensor(wav)
62 | 


--------------------------------------------------------------------------------
/models/utils.py:
--------------------------------------------------------------------------------
 1 | """Useful utilities."""
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | from torch.optim import Optimizer
 7 | from torch.optim.lr_scheduler import LambdaLR
 8 | 
 9 | from fairseq.models.wav2vec import Wav2Vec2Model
10 | 
11 | 
12 | def load_pretrained_wav2vec(ckpt_path):
13 |     """Load pretrained Wav2Vec model."""
14 |     ckpt = torch.load(ckpt_path)
15 |     model = Wav2Vec2Model.build_model(ckpt["args"], task=None)
16 |     model.load_state_dict(ckpt["model"])
17 |     model.remove_pretraining_modules()
18 |     model.eval()
19 |     return model
20 | 
21 | 
22 | def get_cosine_schedule_with_warmup(
23 |     optimizer: Optimizer,
24 |     num_warmup_steps: int,
25 |     num_training_steps: int,
26 |     num_cycles: float = 0.5,
27 |     last_epoch: int = -1,
28 | ):
29 |     """
30 |     Create a schedule with a learning rate that decreases following the values of the cosine function between the
31 |     initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
32 |     initial lr set in the optimizer.
33 | 
34 |     Args:
35 |         optimizer (:class:`~torch.optim.Optimizer`):
36 |             The optimizer for which to schedule the learning rate.
37 |         num_warmup_steps (:obj:`int`):
38 |             The number of steps for the warmup phase.
39 |         num_training_steps (:obj:`int`):
40 |             The total number of training steps.
41 |         num_cycles (:obj:`float`, `optional`, defaults to 0.5):
42 |             The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
43 |             following a half-cosine).
44 |         last_epoch (:obj:`int`, `optional`, defaults to -1):
45 |             The index of the last epoch when resuming training.
46 | 
47 |     Return:
48 |         :obj:`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
49 |     """
50 | 
51 |     def lr_lambda(current_step):
52 |         if current_step < num_warmup_steps:
53 |             return float(current_step) / float(max(1, num_warmup_steps))
54 |         progress = float(current_step - num_warmup_steps) / float(
55 |             max(1, num_training_steps - num_warmup_steps)
56 |         )
57 |         return max(
58 |             0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
59 |         )
60 | 
61 |     return LambdaLR(optimizer, lr_lambda, last_epoch)
62 | 


--------------------------------------------------------------------------------
/data/feature_extract.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from functools import partial
 3 | from multiprocessing import Pool, cpu_count
 4 | from models import load_pretrained_wav2vec
 5 | from data import log_mel_spectrogram
 6 | 
 7 | 
 8 | class FeatureExtractor:
 9 |     def __init__(self, feature_name, wav2vec2_path=None, device=None):
10 |         self.device = device
11 |         if (
12 |             feature_name == "apc"
13 |             or feature_name == "cpc"
14 |             or feature_name == "timit_posteriorgram"
15 |             or feature_name == "fbank"
16 |         ):
17 |             self.extractor = (
18 |                 torch.hub.load("s3prl/s3prl:f2114342ff9e813e18a580fa41418aee9925414e", feature_name, refresh=True).eval().to(device)
19 |             )
20 |             self.mode = 1
21 |         elif feature_name == "wav2vec2":
22 |             self.extractor = load_pretrained_wav2vec(wav2vec2_path).eval().to(device)
23 |             self.mode = 2
24 |         elif feature_name == "wav2vec2_mel":
25 |             self.extractor = partial(
26 |                 log_mel_spectrogram,
27 |                 preemph=0.97,
28 |                 sample_rate=16000,
29 |                 n_mels=80,
30 |                 n_fft=400,
31 |                 hop_length=320,
32 |                 win_length=400,
33 |                 f_min=0,
34 |                 center=False,
35 |             )
36 |             self.mode = 3
37 |         elif feature_name == "cpc_mel":
38 |             self.extractor = partial(
39 |                 log_mel_spectrogram,
40 |                 preemph=0.97,
41 |                 sample_rate=16000,
42 |                 n_mels=80,
43 |                 n_fft=465,
44 |                 hop_length=160,
45 |                 win_length=465,
46 |                 f_min=80,
47 |                 center=True,
48 |             )
49 |             self.mode = 3
50 |         else:
51 |             print(feature_name)
52 |             print(
53 |                 "Please use timit_posteriorgram, apc, wav2vec2, cpc, wav2vec2_mel, cpc_mel, or fbank"
54 |             )
55 |             exit()
56 | 
57 |     def get_feature(self, wavs):
58 |         if self.mode == 1:
59 |             return self.extractor(wavs)
60 |         elif self.mode == 2:
61 |             feats = []
62 |             for wav in wavs:
63 |                 feat = self.extractor.extract_features(wav.unsqueeze(0), None)[0].squeeze(0)
64 |                 feats.append(feat)
65 |         elif self.mode == 3:
66 |             wavs = [wav.cpu().numpy() for wav in wavs]
67 |             feats = [self.extractor(wav) for wav in wavs]
68 |             feats = [torch.FloatTensor(feat).to(self.device) for feat in feats]
69 |             return feats
70 | 
71 |         return feats
72 | 


--------------------------------------------------------------------------------
/preprocess.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """Precompute Wav2Vec features."""
  3 | 
  4 | import os
  5 | import json
  6 | from pathlib import Path
  7 | from tempfile import mkstemp
  8 | from multiprocessing import cpu_count
  9 | 
 10 | import tqdm
 11 | import torch
 12 | from torch.utils.data import DataLoader
 13 | from argparse import ArgumentParser
 14 | from copy import deepcopy
 15 | 
 16 | from models import load_pretrained_wav2vec
 17 | from data import PreprocessDataset
 18 | from data.feature_extract import FeatureExtractor
 19 | 
 20 | def parse_args():
 21 |     """Parse command-line arguments."""
 22 |     parser = ArgumentParser()
 23 |     parser.add_argument("data_dirs", type=str, nargs="+")
 24 |     parser.add_argument("feature_name", type=str)
 25 |     parser.add_argument("wav2vec_path", type=str)
 26 |     parser.add_argument("out_dir", type=str)
 27 |     parser.add_argument("--trim_method", choices=["librosa", "vad"], default="vad")
 28 |     parser.add_argument("--n_workers", type=int, default=cpu_count())
 29 | 
 30 |     parser.add_argument("--sample_rate", type=int, default=16000)
 31 | 
 32 |     return vars(parser.parse_args())
 33 | 
 34 | 
 35 | def main(
 36 |     data_dirs,
 37 |     feature_name,
 38 |     wav2vec_path,
 39 |     out_dir,
 40 |     trim_method,
 41 |     n_workers,
 42 |     sample_rate,
 43 |     **kwargs,
 44 | ):
 45 |     """Main function."""
 46 | 
 47 |     out_dir_path = Path(out_dir)
 48 | 
 49 |     if out_dir_path.exists():
 50 |         assert out_dir_path.is_dir()
 51 |     else:
 52 |         out_dir_path.mkdir(parents=True)
 53 | 
 54 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 55 | 
 56 |     dataset = PreprocessDataset(
 57 |         data_dirs,
 58 |         trim_method,
 59 |         sample_rate
 60 |     )
 61 |     dataloader = DataLoader(
 62 |         dataset, batch_size=1, shuffle=False, drop_last=False, num_workers=n_workers
 63 |     )
 64 | 
 65 | 
 66 |     speaker_infos = {}
 67 |     speaker_infos['feature_name'] = feature_name
 68 | 
 69 |     pbar = tqdm.tqdm(total=len(dataset), ncols=0)
 70 |     mapping = {'apc': 'fbank', 'timit_posteriorgram': 'fbank', 'cpc': 'cpc_mel', 'wav2vec2': 'wav2vec2_mel'}
 71 |     feat_extractor = FeatureExtractor(feature_name, wav2vec_path, device)
 72 |     mel_extractor = FeatureExtractor(mapping[feature_name], wav2vec_path, device)
 73 |     for speaker_name, audio_path, wav in dataloader:
 74 |         if wav.size(-1) < 10:
 75 |             continue
 76 | 
 77 |         wav = wav.to(device)
 78 |         speaker_name = speaker_name[0]
 79 |         audio_path = audio_path[0]
 80 | 
 81 |         
 82 |         with torch.no_grad():
 83 |             feat = feat_extractor.get_feature(wav)[0]
 84 |             mel = mel_extractor.get_feature(wav)[0]
 85 |         fd, temp_file = mkstemp(suffix=".tar", prefix="utterance-", dir=out_dir_path)
 86 |         torch.save({"feat": feat.detach().cpu(), "mel": mel.detach().cpu()}, temp_file)
 87 |         os.close(fd)
 88 | 
 89 |         if speaker_name not in speaker_infos.keys():
 90 |             speaker_infos[speaker_name] = []
 91 | 
 92 |         speaker_infos[speaker_name].append(
 93 |             {
 94 |                 "feature_path": Path(temp_file).name,
 95 |                 "audio_path": audio_path,
 96 |                 "mel_len": len(mel),
 97 |             }
 98 |         )
 99 | 
100 |         pbar.update(dataloader.batch_size)
101 | 
102 |     with open(out_dir_path / "metadata.json", "w") as f:
103 |         json.dump(speaker_infos, f, indent=2)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     main(**parse_args())
108 | 


--------------------------------------------------------------------------------
/info.yaml:
--------------------------------------------------------------------------------
 1 | libri121 -> libri260:
 2 |   source: /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000025_000001.wav
 3 |   target:
 4 |   - /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000003_000001.wav
 5 |   - /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000004_000001.wav
 6 |   - /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000005_000000.wav
 7 |   - /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000006_000000.wav
 8 | libri121 -> p227:
 9 |   source: /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000025_000001.wav
10 |   target:
11 |   - /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_005.wav
12 |   - /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_008.wav
13 |   - /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_011.wav
14 |   - /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_021.wav
15 | libri260 -> libri121:
16 |   source: /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000005_000000.wav
17 |   target:
18 |   - /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000004_000003.wav
19 |   - /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000005_000001.wav
20 |   - /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000025_000000.wav
21 |   - /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000025_000001.wav
22 | libri260 -> p225:
23 |   source: /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000005_000000.wav
24 |   target:
25 |   - /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_005.wav
26 |   - /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_008.wav
27 |   - /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_011.wav
28 |   - /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_021.wav
29 | p225 -> libri260:
30 |   source: /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_006.wav
31 |   target:
32 |   - /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000003_000001.wav
33 |   - /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000004_000001.wav
34 |   - /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000005_000000.wav
35 |   - /home/storage/Dataset/LibriTTS/test-clean/260/123288/260_123288_000006_000000.wav
36 | p225 -> p227:
37 |   source: /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_006.wav
38 |   target:
39 |   - /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_005.wav
40 |   - /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_008.wav
41 |   - /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_011.wav
42 |   - /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_021.wav
43 | p227 -> libri121:
44 |   source: /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_020.wav
45 |   target:
46 |   - /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000004_000003.wav
47 |   - /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000005_000001.wav
48 |   - /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000025_000000.wav
49 |   - /home/storage/Dataset/LibriTTS/test-clean/121/121726/121_121726_000025_000001.wav
50 | p227 -> p225:
51 |   source: /home/storage/Dataset/Survey/VCTK/wav48/p227/p227_020.wav
52 |   target:
53 |   - /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_005.wav
54 |   - /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_008.wav
55 |   - /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_011.wav
56 |   - /home/storage/Dataset/Survey/VCTK/wav48/p225/p225_021.wav
57 | p228 -> p232:
58 |   source: /home/storage/Dataset/Survey/VCTK/wav48/p228/p228_004.wav
59 |   target:
60 |   - /home/storage/Dataset/Survey/VCTK/wav48/p232/p232_005.wav
61 |   - /home/storage/Dataset/Survey/VCTK/wav48/p232/p232_008.wav
62 |   - /home/storage/Dataset/Survey/VCTK/wav48/p232/p232_011.wav
63 |   - /home/storage/Dataset/Survey/VCTK/wav48/p232/p232_021.wav
64 | p232 -> p228:
65 |   source: /home/storage/Dataset/Survey/VCTK/wav48/p232/p232_016.wav
66 |   target:
67 |   - /home/storage/Dataset/Survey/VCTK/wav48/p228/p228_005.wav
68 |   - /home/storage/Dataset/Survey/VCTK/wav48/p228/p228_008.wav
69 |   - /home/storage/Dataset/Survey/VCTK/wav48/p228/p228_011.wav
70 |   - /home/storage/Dataset/Survey/VCTK/wav48/p228/p228_021.wav
71 | 


--------------------------------------------------------------------------------
/data/utils.py:
--------------------------------------------------------------------------------
  1 | """Utilities for data manipulation."""
  2 | 
  3 | from typing import Union
  4 | from pathlib import Path
  5 | 
  6 | import librosa
  7 | from librosa.effects import pitch_shift
  8 | 
  9 | import numpy as np
 10 | import matplotlib
 11 | from matplotlib import pyplot as plt
 12 | from scipy.signal import lfilter
 13 | 
 14 | matplotlib.use("Agg")
 15 | 
 16 | 
 17 | def trim_func(wav, sample_rate):
 18 |     _, (start_frame, end_frame) = librosa.effects.trim(
 19 |         wav, top_db=25, frame_length=512, hop_length=128
 20 |     )
 21 |     start_frame = max(0, start_frame - 0.1 * sample_rate)
 22 |     end_frame = min(len(wav), end_frame + 0.1 * sample_rate)
 23 | 
 24 |     start = int(start_frame)
 25 |     end = int(end_frame)
 26 |     if end - start > 1000:  # prevent empty slice
 27 |         wav = wav[start:end]
 28 |     return wav
 29 | 
 30 | 
 31 | def load_wav(
 32 |     audio_path: Union[str, Path],
 33 |     sample_rate: int,
 34 |     trim: bool = False,
 35 |     shift: int = None,
 36 | ) -> np.ndarray:
 37 |     """Load and preprocess waveform."""
 38 |     wav = librosa.load(audio_path, sr=sample_rate)[0]
 39 | 
 40 |     shifted_wavs = None
 41 |     #if shift is not None:
 42 |     #    shifted_wavs = []
 43 |     #    for i in [--9, -6, -3, 3, 6, 9]:
 44 |     #        shifted_wav = pitch_shift(wav, sample_rate, i)
 45 |     #        shifted_wav = shifted_wav / (np.abs(shifted_wav).max() + 1e-6)
 46 |     #        shifted_wavs.append(shifted_wav)
 47 | 
 48 |     wav = wav / (np.abs(wav).max() + 1e-6)
 49 |     if trim:
 50 |         wav = trim_func(wav, sample_rate)
 51 |         # min_length = 1e100
 52 |         # for i in range(len(shifted_wavs)):
 53 |         #     shifted_wavs[i] = trim_func(shifted_wavs[i], sample_rate)
 54 |         #     min_length = min(len(shifted_wavs[i]), min_length)
 55 |         # for i in range(len(shifted_wavs)):
 56 |         #     shifted_wavs[i] = shifted_wavs[i][:min_length]
 57 | 
 58 |     if shift is not None:
 59 |         return wav, shifted_wavs
 60 |     return wav
 61 | 
 62 | 
 63 | def log_mel_spectrogram(
 64 |     x: np.ndarray,
 65 |     preemph: float,
 66 |     sample_rate: int,
 67 |     n_mels: int,
 68 |     n_fft: int,
 69 |     hop_length: int,
 70 |     win_length: int,
 71 |     f_min: int,
 72 |     center: bool
 73 | ) -> np.ndarray:
 74 |     """Create a log Mel spectrogram from a raw audio signal."""
 75 |     x = lfilter([1, -preemph], [1], x)
 76 |     magnitude = np.abs(
 77 |         librosa.stft(x, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=center)
 78 |     )
 79 |     mel_fb = librosa.filters.mel(
 80 |         sample_rate, n_fft, n_mels=n_mels, fmin=f_min
 81 |     )
 82 |     mel_spec = np.dot(mel_fb, magnitude)
 83 |     log_mel_spec = np.log(mel_spec + 1e-9)
 84 |     return log_mel_spec.T
 85 | 
 86 | 
 87 | def plot_mel(gt_mel, predicted_mel=None, filename="mel.png"):
 88 |     if predicted_mel is not None:
 89 |         fig, axes = plt.subplots(2, 1, squeeze=False, figsize=(10, 10))
 90 |     else:
 91 |         fig, axes = plt.subplots(1, 1, squeeze=False, figsize=(10, 10))
 92 | 
 93 |     axes[0][0].imshow(gt_mel.detach().cpu().numpy().T, origin="lower")
 94 |     axes[0][0].set_aspect(1, adjustable="box")
 95 |     axes[0][0].set_ylim(1.0, 80)
 96 |     axes[0][0].set_title("ground-truth mel-spectrogram", fontsize="medium")
 97 |     axes[0][0].tick_params(labelsize="x-small", left=False, labelleft=False)
 98 | 
 99 |     if predicted_mel is not None:
100 |         axes[1][0].imshow(predicted_mel.detach().cpu().numpy(), origin="lower")
101 |         axes[1][0].set_aspect(1.0, adjustable="box")
102 |         axes[1][0].set_ylim(0, 80)
103 |         axes[1][0].set_title("predicted mel-spectrogram", fontsize="medium")
104 |         axes[1][0].tick_params(labelsize="x-small", left=False, labelleft=False)
105 | 
106 |     plt.tight_layout()
107 |     plt.savefig(filename)
108 |     plt.close()
109 | 
110 | 
111 | def plot_attn(attn, filename="attn.png", save=True):
112 |     fig, axes = plt.subplots(len(attn), 1, squeeze=False, figsize=(10, 10))
113 | 
114 |     for i, layer_attn in enumerate(attn):
115 |         axes[i][0].imshow(attn[i][0].detach().cpu().numpy(), origin="lower")
116 |         axes[i][0].set_title("layer {}".format(i), fontsize="medium")
117 |         axes[i][0].tick_params(labelsize="x-small")
118 |         axes[i][0].set_xlabel("target")
119 |         axes[i][0].set_ylabel("source")
120 | 
121 |     plt.tight_layout()
122 |     if save:
123 |         plt.savefig(filename)
124 |     if not save:
125 |         return fig
126 | 


--------------------------------------------------------------------------------
/convert_batch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """Convert multiple pairs."""
  3 | 
  4 | import warnings
  5 | from pathlib import Path
  6 | from functools import partial
  7 | from multiprocessing import Pool, cpu_count
  8 | 
  9 | import yaml
 10 | import torch
 11 | import numpy as np
 12 | import soundfile as sf
 13 | from argparse import ArgumentParser
 14 | from tqdm import tqdm
 15 | 
 16 | from data import load_wav, log_mel_spectrogram, plot_mel, plot_attn
 17 | from data.feature_extract import FeatureExtractor
 18 | from models import load_pretrained_wav2vec
 19 | 
 20 | 
 21 | def parse_args():
 22 |     """Parse command-line arguments."""
 23 |     parser = ArgumentParser()
 24 |     parser.add_argument("info_path", type=str)
 25 |     parser.add_argument("output_dir", type=str, default=".")
 26 |     parser.add_argument("-c", "--ckpt_path",
 27 |                         default="checkpoints/cpc-cpc.pt")
 28 |     parser.add_argument("-s", "--src_feat_name", default="cpc")
 29 |     parser.add_argument("-r", "--ref_feat_name", default="cpc")
 30 |     parser.add_argument("-w", "--wav2vec_path",
 31 |                         default="checkpoints/wav2vec_small.pt")
 32 |     parser.add_argument("-v", "--vocoder_path",
 33 |                         default="checkpoints/vocoder.pt")
 34 | 
 35 |     parser.add_argument("--sample_rate", type=int, default=16000)
 36 | 
 37 |     return vars(parser.parse_args())
 38 | 
 39 | 
 40 | def main(
 41 |     info_path,
 42 |     output_dir,
 43 |     ckpt_path,
 44 |     src_feat_name,
 45 |     ref_feat_name,
 46 |     wav2vec_path,
 47 |     vocoder_path,
 48 |     sample_rate,
 49 |     **kwargs,
 50 | ):
 51 |     """Main function."""
 52 | 
 53 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 54 |     src_feat_model = FeatureExtractor(src_feat_name, wav2vec_path, device)
 55 | 
 56 |     ref_feat_model = FeatureExtractor(ref_feat_name, wav2vec_path, device)
 57 | 
 58 |     print(f"[INFO] {src_feat_name} is loaded")
 59 | 
 60 |     model = torch.jit.load(ckpt_path).to(device).eval()
 61 |     print("[INFO] FragmentVC is loaded from", ckpt_path)
 62 | 
 63 |     vocoder = torch.jit.load(vocoder_path).to(device).eval()
 64 |     print("[INFO] Vocoder is loaded from", vocoder_path)
 65 | 
 66 |     path2wav = partial(load_wav, sample_rate=sample_rate, trim=True)
 67 | 
 68 |     with open(info_path) as f:
 69 |         infos = yaml.load(f, Loader=yaml.FullLoader)
 70 | 
 71 |     out_mels = []
 72 |     attns = []
 73 |     with Pool(cpu_count()) as pool:
 74 |         for pair_name, pair in tqdm(infos.items()):
 75 |             src_wav = load_wav(pair["source"], sample_rate, trim=True)
 76 |             src_wav = torch.FloatTensor(src_wav).to(device)
 77 | 
 78 |             tgt_wavs = pool.map(path2wav, pair["target"])
 79 |             tgt_wavs = [torch.FloatTensor(tgt_wav).to(device)
 80 |                         for tgt_wav in tgt_wavs]
 81 | 
 82 |             with torch.no_grad():
 83 |                 tgt_mels = ref_feat_model.get_feature(tgt_wavs)
 84 |                 src_mel = (ref_feat_model.get_feature([src_wav])[0].transpose(
 85 |                         0, 1).unsqueeze(0).to(device))
 86 |                 tgt_mels = [tgt_mel.cpu() for tgt_mel in tgt_mels]
 87 |                 tgt_mel = np.concatenate(tgt_mels, axis=0)
 88 |                 tgt_mel = torch.FloatTensor(tgt_mel.T).unsqueeze(0).to(device)
 89 |                 src_feat = src_feat_model.get_feature([src_wav])[
 90 |                     0].unsqueeze(0)
 91 |                 out_mel, attn = model(src_feat, tgt_mel)
 92 | 
 93 |                 out_mel = out_mel.transpose(1, 2).squeeze(0)
 94 |                 out_mels.append(out_mel)
 95 |                 attns.append(attn)
 96 | 
 97 |         # print(f"[INFO] Pair {pair_name} converted")
 98 |     # out_mel: batch_size, time_stamp, mel_dim
 99 |     del model
100 |     del src_feat_model
101 |     del ref_feat_model
102 |     print("[INFO] Generating waveforms...")
103 |     batch_size = 10
104 |     total = len(out_mels)
105 |     out_wavs = []
106 |     pbar = tqdm(total=len(out_mels), ncols=0, unit="wavs")
107 |     with torch.no_grad():
108 |         for i in range(0, total, batch_size):
109 |             out_wavs.extend(vocoder.generate(out_mels[i:i+batch_size]))
110 |             pbar.update(min(batch_size, total))
111 |         if total % batch_size != 0 and total > batch_size:
112 |             out_wavs.extend(vocoder.generate(
113 |                 out_mels[total - total % batch_size:]))
114 |             pbar.update(total % batch_size)
115 |     pbar.close()
116 | 
117 |     print("[INFO] Waveforms generated")
118 | 
119 |     out_dir = Path(output_dir)
120 |     out_dir.mkdir(parents=True, exist_ok=True)
121 | 
122 |     for pair_name, out_mel, out_wav, attn in tqdm(zip(
123 |         infos.keys(), out_mels, out_wavs, attns
124 |     )):
125 |         out_wav = out_wav.cpu().numpy()
126 |         out_path = Path(out_dir, pair_name)
127 | 
128 |         plot_mel(out_mel, filename=out_path.with_suffix(".mel.png"))
129 |         plot_attn(attn, filename=out_path.with_suffix(".attn.png"))
130 |         sf.write(out_path.with_suffix(".wav"), out_wav, sample_rate)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     warnings.filterwarnings("ignore")
135 |     main(**parse_args())
136 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # S2VC
  2 | 
  3 | Here is the implementation of our paper [S2VC: A Framework for Any-to-Any Voice Conversion with Self-Supervised Pretrained Representations](https://arxiv.org/abs/2104.02901). In this paper, we proposed S2VC which utilizes Self-Supervised pretrained representation to provide the latent phonetic structure of the utterance from the source speaker and the spectral features of the utterance from the target speaker.
  4 | 
  5 | The following is the overall model architecture.
  6 | 
  7 | ![Model architecture](docs/imgs/model.png)
  8 | 
  9 | For the audio samples, please refer to our [demo page](https://howard1337.github.io/S2VC/).
 10 | 
 11 | ## Usage
 12 | 
 13 | You can download the pretrained model as well as the vocoder following the link under **Releases** section on the sidebar.
 14 | 
 15 | The whole project was developed using Python 3.8, torch 1.7.1, and the pretrained model, as well as the vocoder, were turned to [TorchScript](https://pytorch.org/docs/stable/jit.html), so it's not guaranteed to be backward compatible.
 16 | You can install the dependencies with
 17 | 
 18 | ```bash
 19 | pip install -r requirements.txt
 20 | ```
 21 | 
 22 | If you encounter any problems while installing *fairseq*, please refer to [pytorch/fairseq](https://github.com/pytorch/fairseq) for the installation instruction.
 23 | 
 24 | ### Self-Supervised representations
 25 | #### Wav2vec2
 26 | In our implementation, we're using Wav2Vec 2.0 Base w/o finetuning which is trained on LibriSpeech.
 27 | You can download the checkpoint [wav2vec_small.pt](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small.pt) from [pytorch/fairseq](https://github.com/pytorch/fairseq).
 28 | #### APC(Autoregressive Predictive Coding), CPC(Contrastive Predictive Coding)
 29 | These two representations are extracted using this speech toolkit [S3PRL](https://github.com/s3prl/s3prl).
 30 | You can check how to extract various representations from that repo.
 31 | 
 32 | ### Vocoder
 33 | 
 34 | The WaveRNN-based neural vocoder is from [yistLin/universal-vocoder](https://github.com/yistLin/universal-vocoder) which is based on the paper, [Towards achieving robust universal neural vocoding](https://arxiv.org/abs/1811.06292).
 35 | 
 36 | ## Voice conversion with pretrained models
 37 | 
 38 | You can convert an utterance from the source speaker with multiple utterances from the target speaker by preparing a conversion pairs information file in YAML format, like
 39 | ```YAML
 40 | # pairs_info.yaml
 41 | pair1:
 42 |     source: VCTK-Corpus/wav48/p225/p225_001.wav
 43 |     target:
 44 |         - VCTK-Corpus/wav48/p227/p227_001.wav
 45 | pair2:
 46 |     source: VCTK-Corpus/wav48/p225/p225_001.wav
 47 |     target:
 48 |         - VCTK-Corpus/wav48/p227/p227_002.wav
 49 |         - VCTK-Corpus/wav48/p227/p227_003.wav
 50 |         - VCTK-Corpus/wav48/p227/p227_004.wav
 51 | ```
 52 | 
 53 | And convert multiple pairs at the same time, e.g.
 54 | ```bash
 55 | python convert_batch.py \
 56 |     -w <WAV2VEC_PATH> \
 57 |     -v <VOCODER_PATH> \
 58 |     -c <CHECKPOINT_PATH> \
 59 |     -s <SOURCE_FEATURE_NAME> \
 60 |     -r <REFERENCE_FEATURE_NAME> \
 61 |     pairs_info.yaml \
 62 |     outputs # the output directory of conversion results
 63 | ```
 64 | 
 65 | After the conversion, the output directory, `outputs`, will be containing
 66 | ```text
 67 | pair1.wav
 68 | pair1.mel.png
 69 | pair1.attn.png
 70 | pair2.wav
 71 | pair2.mel.png
 72 | pair2.attn.png
 73 | ```
 74 | 
 75 | ## Train from scratch
 76 | 
 77 | ### Preprocessing
 78 | You can preprocess multiple corpora by passing multiple paths.
 79 | But each path should be the directory that directly contains the speaker directories.
 80 | And you have to specify the feature you want to extract.
 81 | Currently, we support apc, cpc, wav2vec2, and timit_posteriorgram.
 82 | i.e.
 83 | ```bash
 84 | python3 preprocess.py
 85 |     VCTK-Corpus/wav48 \
 86 |     <SECOND_Corpus_PATH> \ # more corpus if you want
 87 |     <FEATURE_NAME> \
 88 |     <WAV2VEC_PATH> \
 89 |     processed/<FEATURE_NAME>  # the output directory of preprocessed features
 90 | ```
 91 | After preprocessing, the output directory will be containing:
 92 | ```text
 93 | metadata.json
 94 | utterance-000x7gsj.tar
 95 | utterance-00wq7b0f.tar
 96 | utterance-01lpqlnr.tar
 97 | ...
 98 | ```
 99 | 
100 | You may need to preprocess multiple times for different features.
101 | i.e.
102 | ```bash
103 | python3 preprocess.py
104 |     VCTK-Corpus/wav48 apc <WAV2VEC_PATH> processed/apc
105 | python3 preprocess.py
106 |     VCTK-Corpus/wav48 cpc <WAV2VEC_PATH> processed/cpc
107 |     ...
108 | ```
109 | 
110 | Then merge the metadata of different features.
111 | 
112 | i.e.
113 | ```bash
114 | python3 merger.py processed
115 | ```
116 | 
117 | 
118 | ### Training
119 | 
120 | ```bash
121 | python train.py processed
122 |     --save_dir ./ckpts \
123 |     -s <SOURCE_FEATURE_NAME> \
124 |     -r <REFERENCE_FEATURE_NAME>
125 | ```
126 | 
127 | 
128 | You can further specify `--preload` for preloading all training data into RAM to boost training speed.
129 | If `--comment <COMMENT>` is specified, e.g. `--comment CPC-CPC`, the training logs will be placed under a newly created directory like, `logs/2020-02-02_12:34:56_CPC-CPC`, otherwise there won't be any logging.
130 | For more details, you can refer to the usage by `python train.py -h`.
131 | 


--------------------------------------------------------------------------------
/data/intra_speaker_dataset.py:
--------------------------------------------------------------------------------
  1 | """Dataset for reconstruction scheme."""
  2 | 
  3 | import json
  4 | import random
  5 | from pathlib import Path
  6 | from copy import deepcopy
  7 | from concurrent.futures import ThreadPoolExecutor
  8 | 
  9 | import torch
 10 | from tqdm import tqdm
 11 | from torch.utils.data import Dataset
 12 | from torch.nn.utils.rnn import pad_sequence
 13 | 
 14 | import sox
 15 | 
 16 | from .utils import load_wav, log_mel_spectrogram
 17 | 
 18 | 
 19 | 
 20 | class IntraSpeakerDataset(Dataset):
 21 |     """Dataset for reconstruction scheme.
 22 | 
 23 |     Returns:
 24 |         speaker_id: speaker id number.
 25 |         feat: Wav2Vec feature tensor.
 26 |         mel: log mel spectrogram tensor.
 27 |     """
 28 | 
 29 |     def __init__(self, data_dir, metadata_path, src_feat, ref_feat, n_samples=5, pre_load=False, training=True):
 30 |         with open(metadata_path, "r") as f:
 31 |             metadata = json.load(f)
 32 | 
 33 |         executor = ThreadPoolExecutor(max_workers=4)
 34 |         futures = []
 35 |         for speaker_name, utterances in metadata.items():
 36 |             for utterance in utterances:
 37 |                 futures.append(
 38 |                     executor.submit(
 39 |                         _process_data,
 40 |                         speaker_name,
 41 |                         data_dir,
 42 |                         utterance,
 43 |                         pre_load,
 44 |                         src_feat,
 45 |                         ref_feat,
 46 |                     )
 47 |                 )
 48 | 
 49 |         self.data = []
 50 |         self.speaker_to_indices = {}
 51 |         for i, future in enumerate(tqdm(futures, ncols=0)):
 52 |             result = future.result()
 53 |             speaker_name = result[0]
 54 |             self.data.append(result)
 55 |             if speaker_name not in self.speaker_to_indices:
 56 |                 self.speaker_to_indices[speaker_name] = [i]
 57 |             else:
 58 |                 self.speaker_to_indices[speaker_name].append(i)
 59 | 
 60 | 
 61 |         self.data_dir = Path(data_dir)
 62 |         self.n_samples = n_samples
 63 |         self.pre_load = pre_load
 64 |         self.training = training
 65 |         self.src_feat = src_feat
 66 |         self.ref_feat = ref_feat
 67 |         self.src_dim = -1
 68 |         self.ref_dim = -1
 69 |         self.tgt_dim = -1
 70 | 
 71 |     def __len__(self):
 72 |         return len(self.data)
 73 | 
 74 |     def _get_data(self, index):
 75 |         if self.pre_load:
 76 |             speaker_name, content_emb, target_emb, target_mel = self.data[index]
 77 |         else:
 78 |             speaker_name, content_emb, target_emb, target_mel = _load_data(*self.data[index])
 79 |         self.src_dim = content_emb.shape[1]
 80 |         self.ref_dim = target_emb.shape[1]
 81 |         self.tgt_dim = target_mel.shape[1]
 82 | 
 83 |         return speaker_name, content_emb, target_emb, target_mel
 84 | 
 85 |     def __getitem__(self, index):
 86 |         speaker_name, content_emb, target_emb, target_mel = self._get_data(index)
 87 |         return content_emb, target_emb, target_mel
 88 |     
 89 |     def get_feat_dim(self):
 90 |         self._get_data(0)
 91 |         return self.src_dim, self.ref_dim, self.tgt_dim
 92 | 
 93 | 
 94 | def _process_data(speaker_name, data_dir, feature, load, src_feat, ref_feat):
 95 |     _, src_feature_path, ref_feature_path = feature["audio_path"], feature[src_feat], feature[ref_feat]
 96 |     if load:
 97 |         return _load_data(speaker_name, data_dir, src_feature_path, ref_feature_path)
 98 |     else:
 99 |         return speaker_name, data_dir, src_feature_path, ref_feature_path
100 | 
101 | 
102 | def _load_data(speaker_name, data_dir, src_feature_path, ref_feature_path):
103 |     src_feature = torch.load(Path(data_dir, src_feature_path), 'cpu')
104 |     ref_feature = torch.load(Path(data_dir, ref_feature_path), 'cpu')
105 |     content_emb = src_feature["feat"].detach().cpu()
106 |     target_emb = ref_feature["feat"].detach().cpu()
107 |     target_mel = src_feature["mel"].detach().cpu()
108 |     return speaker_name, content_emb, target_emb, target_mel
109 | 
110 | 
111 | def collate_batch(batch):
112 |     """Collate a batch of data."""
113 |     srcs, tgts, tgt_mels = zip(*batch)
114 | 
115 |     src_lens = [len(src) for src in srcs]
116 |     tgt_lens = [len(tgt) for tgt in tgts]
117 |     tgt_mel_lens = [len(tgt_mel) for tgt_mel in tgt_mels]
118 | 
119 |     overlap_lens = [
120 |         min(src_len, tgt_mel_len) for src_len, tgt_mel_len in zip(src_lens, tgt_mel_lens)
121 |     ]
122 | 
123 |     srcs = pad_sequence(srcs, batch_first=True)
124 | 
125 |     src_masks = [torch.arange(srcs.size(1)) >= src_len for src_len in src_lens]
126 |     src_masks = torch.stack(src_masks)
127 | 
128 |     tgts = pad_sequence(tgts, batch_first=True, padding_value=-20)
129 |     tgts = tgts.transpose(1, 2)  # (batch, mel_dim, max_tgt_len)
130 | 
131 |     tgt_masks = [torch.arange(tgts.size(2)) >= tgt_len for tgt_len in tgt_lens]
132 |     tgt_masks = torch.stack(tgt_masks)  # (batch, max_tgt_len)
133 | 
134 |     tgt_mels = pad_sequence(tgt_mels, batch_first=True, padding_value=-20)
135 |     tgt_mels = tgt_mels.transpose(1, 2)  # (batch, mel_dim, max_tgt_len)
136 | 
137 |     return srcs, src_masks, tgts, tgt_masks, tgt_mels, overlap_lens
138 | 


--------------------------------------------------------------------------------
/models/convolutional_transformer.py:
--------------------------------------------------------------------------------
  1 | """Convolutional transsformer"""
  2 | 
  3 | from typing import Optional, Tuple
  4 | 
  5 | import torch.nn.functional as F
  6 | from torch import Tensor, bmm
  7 | from torch.nn import (
  8 |     Module,
  9 |     Dropout,
 10 |     LayerNorm,
 11 |     Conv1d,
 12 |     MultiheadAttention,
 13 |     Sequential,
 14 |     Linear,
 15 |     ReLU,
 16 |     Sigmoid,
 17 |     InstanceNorm1d,
 18 | )
 19 | from torch.nn.modules.linear import _LinearWithBias
 20 | 
 21 | 
 22 | class Smoother(Module):
 23 |     """Convolutional Transformer Encoder Layer"""
 24 | 
 25 |     def __init__(self, d_model: int, nhead: int, d_hid: int, dropout=0.1):
 26 |         super(Smoother, self).__init__()
 27 |         self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
 28 | 
 29 |         self.conv1 = Conv1d(d_model, d_hid, 9, padding=4)
 30 |         self.conv2 = Conv1d(d_hid, d_model, 1, padding=0)
 31 | 
 32 |         self.norm1 = LayerNorm(d_model)
 33 |         self.norm2 = LayerNorm(d_model)
 34 |         self.dropout1 = Dropout(dropout)
 35 |         self.dropout2 = Dropout(dropout)
 36 | 
 37 |     def forward(
 38 |         self,
 39 |         src: Tensor,
 40 |         src_mask: Optional[Tensor] = None,
 41 |         src_key_padding_mask: Optional[Tensor] = None,
 42 |     ) -> Tensor:
 43 |         # multi-head self attention
 44 |         src2 = self.self_attn(
 45 |             src, src, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
 46 |         )[0]
 47 | 
 48 |         # add & norm
 49 |         src = src + self.dropout1(src2)
 50 |         src = self.norm1(src)
 51 | 
 52 |         # conv1d
 53 |         src2 = src.transpose(0, 1).transpose(1, 2)
 54 |         src2 = self.conv2(F.relu(self.conv1(src2)))
 55 |         src2 = src2.transpose(1, 2).transpose(0, 1)
 56 | 
 57 |         # add & norm
 58 |         src = src + self.dropout2(src2)
 59 |         src = self.norm2(src)
 60 |         return src
 61 | 
 62 | 
 63 | class Extractor(Module):
 64 |     """Convolutional Transformer Decoder Layer"""
 65 | 
 66 |     def __init__(
 67 |         self,
 68 |         d_model: int,
 69 |         nhead: int,
 70 |         d_hid: int,
 71 |         bottleneck_dim: int,
 72 |         dropout=0.1,
 73 |         no_residual=False,
 74 |         bottleneck=False,
 75 |     ):
 76 |         super(Extractor, self).__init__()
 77 |         self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
 78 |         self.cross_attn = MultiheadAttention(bottleneck_dim, nhead, dropout=dropout)
 79 |         self.out_proj = _LinearWithBias(d_model, d_model)
 80 | 
 81 |         self.conv1 = Conv1d(d_model, d_hid, 9, padding=4)
 82 |         self.conv2 = Conv1d(d_hid, d_model, 1, padding=0)
 83 |         
 84 |         self.bottleneck = bottleneck
 85 |         self.tgt_bottleneck = Sequential(
 86 |             Linear(d_model, d_model),
 87 |             ReLU(),
 88 |             # InstanceNorm1d(d_model),
 89 |             Linear(d_model, bottleneck_dim),
 90 |         )
 91 | 
 92 |         self.memory_bottleneck = Sequential(
 93 |             Linear(d_model, d_model),
 94 |             ReLU(),
 95 |             # InstanceNorm1d(d_model),
 96 |             Linear(d_model, bottleneck_dim),
 97 |         )
 98 | 
 99 |         self.norm1 = LayerNorm(d_model)
100 |         self.norm2 = LayerNorm(d_model)
101 |         self.norm3 = LayerNorm(d_model)
102 |         self.dropout1 = Dropout(dropout)
103 |         self.dropout2 = Dropout(dropout)
104 |         self.dropout3 = Dropout(dropout)
105 | 
106 |         self.no_residual = no_residual
107 | 
108 |     def forward(
109 |         self,
110 |         tgt: Tensor,
111 |         memory: Tensor,
112 |         tgt_mask: Optional[Tensor] = None,
113 |         memory_mask: Optional[Tensor] = None,
114 |         tgt_key_padding_mask: Optional[Tensor] = None,
115 |         memory_key_padding_mask: Optional[Tensor] = None,
116 |     ) -> Tuple[Tensor, Optional[Tensor]]:
117 |         # multi-head self attention
118 |         tgt2 = self.self_attn(
119 |             tgt, tgt, tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
120 |         )[0]
121 | 
122 |         # add & norm
123 |         tgt = tgt + self.dropout1(tgt2)
124 |         tgt = self.norm1(tgt)
125 | 
126 |         # bottleneck feature of target and references
127 |         if self.bottleneck:
128 |             tgt_compat = self.tgt_bottleneck(tgt)
129 |             memory_compact = self.memory_bottleneck(memory)
130 |         else:
131 |             tgt_compat = tgt
132 |             memory_compact = memory
133 | 
134 |         # multi-head cross attention
135 |         tgt2, attn = self.cross_attn(
136 |             tgt_compat,
137 |             memory_compact,
138 |             memory_compact,
139 |             attn_mask=memory_mask,
140 |             key_padding_mask=memory_key_padding_mask,
141 |         )
142 |         
143 |         if self.bottleneck and attn is not None:
144 |             memory = (
145 |                 memory.contiguous()
146 |                 .view(memory.size(0), -1, memory.size(-1))
147 |                 .transpose(0, 1)
148 |             )
149 |             tgt2 = bmm(attn, memory)
150 |             tgt2 = (
151 |                 tgt2.transpose(0, 1)
152 |                 .contiguous()
153 |                 .view(-1, memory.size(0), memory.size(2))
154 |             )
155 |             tgt2 = F.linear(tgt2, self.out_proj.weight, self.out_proj.bias)
156 |         # add & norm
157 |         if self.no_residual:
158 |             tgt = self.dropout2(tgt2)
159 |         else:
160 |             tgt = tgt + self.dropout2(tgt2)
161 |         tgt = self.norm2(tgt)
162 | 
163 |         # conv1d
164 |         tgt2 = tgt.transpose(0, 1).transpose(1, 2)
165 |         tgt2 = self.conv2(F.relu(self.conv1(tgt2)))
166 |         tgt2 = tgt2.transpose(1, 2).transpose(0, 1)
167 | 
168 |         # add & norm
169 |         tgt = tgt + self.dropout3(tgt2)
170 |         tgt = self.norm3(tgt)
171 | 
172 |         return tgt, attn
173 | 


--------------------------------------------------------------------------------
/models/model.py:
--------------------------------------------------------------------------------
  1 | """FragmentVC model architecture."""
  2 | 
  3 | from typing import Tuple, List, Optional
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from torch import Tensor
  9 | 
 10 | from .convolutional_transformer import Smoother, Extractor
 11 | 
 12 | class S2VC(nn.Module):
 13 |     """
 14 |     FragmentVC uses Wav2Vec feature of the source speaker to query and attend
 15 |     on mel spectrogram of the target speaker.
 16 |     """
 17 | 
 18 |     def __init__(self, input_dim, ref_dim, d_model=512):
 19 |         super().__init__()
 20 |         self.unet = UnetBlock(d_model, input_dim, ref_dim)
 21 | 
 22 |         self.smoothers = nn.TransformerEncoder(Smoother(d_model, 2, 1024), num_layers=3)
 23 | 
 24 |         self.mel_linear = nn.Linear(d_model, 80)
 25 | 
 26 |         self.post_net = nn.Sequential(
 27 |             nn.Conv1d(80, 512, kernel_size=5, padding=2),
 28 |             nn.BatchNorm1d(512),
 29 |             nn.Tanh(),
 30 |             nn.Dropout(0.5),
 31 |             nn.Conv1d(512, 512, kernel_size=5, padding=2),
 32 |             nn.BatchNorm1d(512),
 33 |             nn.Tanh(),
 34 |             nn.Dropout(0.5),
 35 |             nn.Conv1d(512, 512, kernel_size=5, padding=2),
 36 |             nn.BatchNorm1d(512),
 37 |             nn.Tanh(),
 38 |             nn.Dropout(0.5),
 39 |             nn.Conv1d(512, 512, kernel_size=5, padding=2),
 40 |             nn.BatchNorm1d(512),
 41 |             nn.Tanh(),
 42 |             nn.Dropout(0.5),
 43 |             nn.Conv1d(512, 80, kernel_size=5, padding=2),
 44 |             nn.BatchNorm1d(80),
 45 |             nn.Dropout(0.5),
 46 |         )
 47 | 
 48 |     def forward(
 49 |         self,
 50 |         srcs: Tensor,
 51 |         refs: Tensor,
 52 |         src_masks: Optional[Tensor] = None,
 53 |         ref_masks: Optional[Tensor] = None,
 54 |     ) -> Tuple[Tensor, List[Optional[Tensor]]]:
 55 |         """Forward function.
 56 | 
 57 |         Args:
 58 |             srcs: (batch, src_len, 768)
 59 |             src_masks: (batch, src_len)
 60 |             refs: (batch, 80, ref_len)
 61 |             ref_masks: (batch, ref_len)
 62 |         """
 63 |         # out: (src_len, batch, d_model)
 64 |         out, attns = self.unet(srcs, refs, src_masks=src_masks, ref_masks=ref_masks)
 65 | 
 66 |         # out: (src_len, batch, d_model)
 67 |         out = self.smoothers(out, src_key_padding_mask=src_masks)
 68 | 
 69 |         # out: (src_len, batch, 80)
 70 |         out = self.mel_linear(out)
 71 | 
 72 |         # out: (batch, 80, src_len)
 73 |         out = out.transpose(1, 0).transpose(2, 1)
 74 |         refined = self.post_net(out)
 75 |         out = out + refined
 76 | 
 77 |         # out: (batch, 80, src_len)
 78 |         return out, attns
 79 | 
 80 | 
 81 | 
 82 | class SelfAttentionPooling(nn.Module):
 83 |   """
 84 |   Implementation of SelfAttentionPooling from https://gist.github.com/pohanchi/c77f6dbfbcbc21c5215acde4f62e4362
 85 |   Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition
 86 |   https://arxiv.org/pdf/2008.01077v1.pdf
 87 |   """
 88 |   def __init__(self, input_dim: int):
 89 |     super(SelfAttentionPooling, self).__init__()
 90 |     self.W = nn.Linear(input_dim, 1)
 91 |     self.softmax = nn.functional.softmax
 92 | 
 93 |   def forward(self, batch_rep: Tensor, att_mask: Optional[Tensor] = None):
 94 |     """
 95 |       N: batch size, T: sequence length, H: Hidden dimension
 96 |       input:
 97 |         batch_rep : size (N, T, H)
 98 |       attention_weight:
 99 |         att_w : size (N, T, 1)
100 |       return:
101 |         utter_rep: size (N, H)
102 |     """
103 |     att_logits = self.W(batch_rep).squeeze(-1)
104 |     if att_mask is not None:
105 |       att_logits = att_logits.masked_fill(att_mask, 1e-20)
106 |     att_w = self.softmax(att_logits, dim=-1).unsqueeze(-1)
107 |     utter_rep = torch.sum(batch_rep * att_w, dim=1)
108 | 
109 |     return utter_rep
110 | 
111 | class SourceEncoder(nn.Module):
112 |     def __init__(self, d_model: int, input_dim: int):
113 |         super(SourceEncoder, self).__init__()
114 |         # encoder_layer = nn.TransformerEncoderLayer(d_model, 2, 1024, 0.1)
115 |         # self.encoder = nn.TransformerEncoder(encoder_layer, 6)
116 |         
117 |         self.lin1 = nn.Linear(input_dim, input_dim)
118 |         self.lin2 = nn.Linear(input_dim,  d_model)
119 |         self.lin3 = nn.Linear(d_model,  d_model)
120 |         self.lin4 = nn.Linear(d_model,  d_model)
121 | 
122 |         self.bn1 = nn.BatchNorm1d(input_dim)
123 |         self.bn2 = nn.BatchNorm1d(d_model)
124 |         self.bn3 = nn.BatchNorm1d(d_model)
125 |         self.bn4 = nn.BatchNorm1d(d_model)
126 | 
127 |         self.dropout1 = nn.Dropout(0.0)
128 |         self.dropout2 = nn.Dropout(0.0)
129 |         self.dropout3 = nn.Dropout(0.0)
130 |         self.dropout4 = nn.Dropout(0.0)
131 | 
132 |         self.SAP = SelfAttentionPooling(d_model)
133 |         self.proj = nn.Linear(d_model, d_model)
134 |         torch.nn.init.xavier_uniform_(
135 |             self.proj.weight,   gain=torch.nn.init.calculate_gain('linear')
136 |         )
137 | 
138 |     def forward(self, srcs: Tensor, refs: Tensor, src_masks: Optional[Tensor] = None, ref_masks: Optional[Tensor] = None):
139 |         tgt = F.relu(self.lin1(srcs)).transpose(1, 2)
140 |         tgt = self.dropout1(self.bn1(tgt)).transpose(1, 2)
141 | 
142 |         tgt = F.relu(self.lin2(tgt)).transpose(1, 2)
143 |         tgt = self.dropout2(self.bn2(tgt)).transpose(1, 2)
144 | 
145 |         tgt = F.relu(self.lin3(tgt)).transpose(1, 2)
146 |         tgt = self.dropout3(self.bn3(tgt)).transpose(1, 2)
147 | 
148 |         tgt = F.relu(self.lin4(tgt)).transpose(1, 2)
149 |         tgt = self.dropout4(self.bn4(tgt)).transpose(1, 2)
150 | 
151 |         spk_embed = F.relu(self.proj(self.SAP(refs.transpose(1, 2), ref_masks))).unsqueeze(1)
152 |         tgt *= spk_embed
153 | 
154 |         # tgt = self.encoder(tgt, src_masks)
155 |         return tgt
156 | 
157 | 
158 | class UnetBlock(nn.Module):
159 |     """Hierarchically attend on references."""
160 | 
161 |     def __init__(self, d_model: int, input_dim: int, ref_dim: int):
162 |         super(UnetBlock, self).__init__()
163 |         self.conv1 = nn.Conv1d(ref_dim, d_model, 3, padding=1, padding_mode="replicate")
164 |         self.conv2 = nn.Conv1d(d_model, d_model, 3, padding=1, padding_mode="replicate")
165 |         self.conv3 = nn.Conv1d(d_model, d_model, 3, padding=1, padding_mode="replicate")
166 | 
167 |         use_bottleneck = True
168 |         bottleneck_dim = 4
169 |         n_head = 2
170 |         self.extractor1 = Extractor(
171 |             d_model, n_head, 1024, bottleneck_dim, no_residual=True, bottleneck=use_bottleneck,
172 |         )
173 | 
174 |         self.src_encoder = SourceEncoder(d_model, input_dim)
175 |     def forward(
176 |         self,
177 |         srcs: Tensor,
178 |         refs: Tensor,
179 |         src_masks: Optional[Tensor] = None,
180 |         ref_masks: Optional[Tensor] = None,
181 |     ) -> Tuple[Tensor, List[Optional[Tensor]]]:
182 |         """Forward function.
183 | 
184 |         Args:
185 |             srcs: (batch, 80, src_len)
186 |             src_masks: (batch, src_len)
187 |             refs: (batch, 80, ref_len)
188 |             ref_masks: (batch, ref_len)
189 |         """
190 | 
191 |         # tgt: (batch, mel_len, bottleneck_dim)
192 |         
193 |         # tgt: (tgt_len, batch, bottleneck_dim)
194 | 
195 |         # ref*: (batch, d_model, mel_len)
196 |         ref1 = self.conv1(refs)
197 |         ref2 = self.conv2(F.relu(ref1))
198 |         ref3 = self.conv3(F.relu(ref2))
199 | 
200 |         tgt = self.src_encoder(srcs, ref3, src_masks, ref_masks)
201 |         tgt = tgt.transpose(0, 1)
202 | 
203 |         # out*: (tgt_len, batch, d_model)
204 |         out, attn1 = self.extractor1(
205 |             tgt,
206 |             ref3.transpose(1, 2).transpose(0, 1),
207 |             tgt_key_padding_mask=src_masks,
208 |             memory_key_padding_mask=ref_masks,
209 |         )
210 |         return out, [attn1]
211 | 
212 | 


--------------------------------------------------------------------------------
/docs/styles.css:
--------------------------------------------------------------------------------
  1 | html {
  2 |     background-color: lightgrey;
  3 |     font-family: sans-serif;
  4 |     -webkit-text-size-adjust: 100%;
  5 |     -ms-text-size-adjust: 100%;
  6 |     margin: 0;
  7 |     padding: 0;
  8 | }
  9 | 
 10 | body {
 11 |     background-color : lightgrey;
 12 |     margin: auto;
 13 |     width: 100%;
 14 |     min-width: 1200px;
 15 |     max-width: 2000px;
 16 |     height: 100%;
 17 |     padding: 0;
 18 | }
 19 | 
 20 | .container{
 21 |     position: relative;
 22 |     /* background:  rgb(22, 38, 67); For browsers that do not support gradients */
 23 |     background: -webkit-linear-gradient(color1, color2); /* For Safari 5.1 to 6.0 /*
 24 |     /* background: -o-linear-gradient(color1, color2); /* For Opera 11.1 to 12.0 */
 25 |     /* background: -moz-linear-gradient(color1, color2); /* For Firefox 3.6 to 15 */
 26 |     /* background: linear-gradient(color1, color2);  /* Standard syntax */
 27 | 	background-size: cover;
 28 |     height: auto;
 29 |     padding: 2%;
 30 | }
 31 | 
 32 | .footer-container{
 33 |     position: relative;
 34 |     background-image: url("../img/pattern2.png") ;
 35 |     background-size: cover;
 36 |     height: auto;
 37 |     padding: 30px 30px;
 38 | }
 39 | 
 40 | a {
 41 |     color: white; 
 42 | }
 43 | 
 44 | #img1{
 45 |     z-index: 100;
 46 |     position: absolute;
 47 |     left: 10%;
 48 |     top: 15%;
 49 |     border-radius: 50%;
 50 |     height: 70%;
 51 |     width: auto;
 52 |     box-shadow: 10px 5px 5px gray;
 53 | }
 54 | 
 55 | #img3{
 56 |     width: 90%;
 57 |     align-content: center;
 58 | 	height: auto;
 59 |     padding: 2%;
 60 | }
 61 | 
 62 | #cat{
 63 |     display: inline-block;
 64 |     float: left;
 65 |     width: 42%;
 66 |     height: auto;
 67 |     padding-left: 5%; 
 68 | }
 69 | 
 70 | #cat_2{
 71 |     display: inline-block;
 72 |     float: right;
 73 |     width: 42%;
 74 |     height: auto;
 75 |     padding-right: 5%;
 76 | }
 77 | 
 78 | #text1{
 79 |     z-index:100;
 80 |     position: relative;
 81 |     color: white;
 82 |     font-size: 40px;
 83 |     font-weight: bold;
 84 |     text-align: center;
 85 |     margin: 2%;
 86 | }
 87 | 
 88 | #intro{
 89 |     z-index:100;
 90 |     color: white;
 91 |     font-size: 22px;
 92 |     text-align: center;
 93 | }
 94 | 
 95 | #text2{
 96 |     color: white;
 97 |     font-size: 30px;
 98 |     font-weight: bold;
 99 |     text-align: center;
100 |     padding: 20px;
101 | }
102 | 
103 | #footnote{
104 | 	color: white;
105 |     font-size: 20px;
106 |     text-align: center;
107 |     padding-bottom: 20px;
108 | }
109 | 
110 | #area1{
111 | 	width:100%;
112 | 	height:100px;
113 | }
114 | 
115 | .clear{
116 |     clear:both;
117 | }
118 | 
119 | .img-circle {
120 |     border-radius: 50%;
121 | }
122 | 
123 | .content-container{
124 |     background-color: white;
125 | 	padding: 40px 40px;
126 | 	text-align: left;
127 | 	font-size: 20px;
128 |     margin-bottom: 30px;
129 |     display: block;
130 | }
131 | 
132 | .content-title{
133 |     font-size: 30px;
134 |     color: rgb(22, 38, 67);
135 |     text-align: center;
136 |     padding-bottom: 20px;
137 |     font-weight: bold;
138 | }
139 | 
140 | nav{
141 |     margin-bottom: 76px;
142 | }
143 | 
144 | .nav-button {
145 |     background-color: #999999;
146 |     width: 50%;
147 |     padding: 10px 0;
148 |     text-decoration: none;
149 |     text-align: center;
150 |     font-size: 20px;
151 |     color: white;
152 |     float: left;
153 |     cursor: pointer;
154 | }
155 | 
156 | .nav-button:hover {
157 |     background-color: #666666;
158 | }
159 | 
160 | .icon {
161 |     background-color: white;
162 |     height: 40px;
163 |     width: 40px;
164 |     padding: 0 0;
165 |     margin-right: 5px;
166 |     display: inline-block;
167 |     position: center;
168 |     cursor: pointer;
169 |     border-radius: 50%;
170 | }
171 | 
172 | .icon-container{
173 | 	text-align:center;
174 |     margin: 10px 10px;
175 | }
176 | 
177 | /* The following is for projects.php*/
178 | .project-container{
179 | 	display: block;
180 | 	height: auto;
181 | 	width: 100%;
182 | 	margin-bottom: 30px;
183 | }
184 | .project-image-container{
185 | 	display: inline-block;
186 | 	vertical-align: top;
187 | 	width: 30%;
188 | 	height: auto;
189 | 	margin-right: 3%;
190 | }
191 | .project-logo-container{
192 | 	display: inline-block;
193 | 	vertical-align: top;
194 | 	width: 4%;
195 | 	height: auto;
196 |     margin-right: 2%; 
197 | }
198 | .project-text-container{
199 | 	display: inline-block;
200 | 	vertical-align: top;
201 | 	width: 60%;
202 | 	height: auto;
203 | 	margin-left: 3%;
204 | }
205 | .project-full-text-container{
206 | 	display: inline-block;
207 | 	vertical-align: top;
208 | 	width: 90%;
209 | 	height: auto;
210 |     margin-left: 2%; 
211 | }
212 | .project-video-container{
213 |     width: 70%;
214 |     height: auto;
215 |     margin-left: 15%;
216 |     margin-right: 15%;
217 |     margin-top: 30px;
218 |     margin-bottom: 30px;
219 |     display: block;
220 | }
221 | .project-year-container{
222 |     background-color: rgb(22, 38, 67); /*#FFC064*/
223 |     padding: 5px;
224 |     color: white;
225 |     font-size: 36px;
226 |     font-weight: bold;
227 |     text-align: center;
228 | }
229 | 
230 | table {
231 |     text-align: center;
232 |     align-content: center;
233 |     vertical-align: middle;
234 |     border-collapse: collapse;
235 |     width: 100%;
236 | }
237 | 
238 | th, td{
239 |     width: 65px;
240 |     padding: 1%;
241 |     text-align: center;
242 |     align-content: center;
243 |     vertical-align: middle;
244 |     border-collapse: collapse;
245 | }
246 | 
247 | .play-button{
248 |     width: 50px; 
249 |     height: auto;
250 |     display: block; 
251 | }
252 | 
253 | .play-button-demo{
254 |     width: 200px; 
255 |     height: auto;
256 |     display: block; 
257 | }
258 | 
259 | /* End here */
260 | 
261 | h1 {
262 |     text-align: center;
263 |     color: rgb(22, 38, 67);
264 | }
265 | 
266 | .icon-container > a {
267 |     color: transparent;
268 | }
269 | 
270 | img {
271 |     display: block;
272 |     width: 100%;
273 |     height: 100%;
274 |     margin-left: auto;
275 |     margin-right: auto;
276 | }
277 | 
278 | hr {
279 |     color: rgb(22, 38, 67);
280 |     size: 10px;
281 | }
282 | 
283 | .option-div{
284 |     font-size: 24px;
285 |     color: rgb(22, 38, 67);
286 |     font-weight: bold;
287 | }
288 | 
289 | .option-div option{
290 |     font-size: 20px;
291 | }
292 | 
293 | .fa-play:before {
294 |     content: "\f04b"
295 | }
296 | 
297 | .fa-pause:before {
298 |     content: "\f04c"
299 | }
300 | 
301 | .fa-stop:before {
302 |     content: "\f04d"
303 | }
304 | 
305 | .fa {
306 |     display: inline-block;
307 |     font: normal normal normal 14px / 1 FontAwesome;
308 |     font-size: inherit;
309 |     text-rendering: auto;
310 |     -webkit-font-smoothing: antialiased;
311 |     -moz-osx-font-smoothing: grayscale
312 | }
313 | 
314 | .btn {
315 |     display: inline-block;
316 |     font-weight: 400;
317 |     text-align: center;
318 |     white-space: nowrap;
319 |     vertical-align: middle;
320 |     -webkit-user-select: none;
321 |     -moz-user-select: none;
322 |     -ms-user-select: none;
323 |     user-select: none;
324 |     border: 1px solid transparent;
325 |     padding: .375rem .75rem;
326 |     font-size: 1rem;
327 |     line-height: 1.5;
328 |     border-radius: .25rem;
329 |     transition: color .15s ease-in-out, background-color .15s ease-in-out, border-color .15s ease-in-out, box-shadow .15s ease-in-out
330 | }
331 | 
332 | @media screen and (prefers-reduced-motion:reduce) {
333 |     .btn {
334 |         transition: none
335 |     }
336 | }
337 | 
338 | .btn:focus, .btn:hover {
339 |     text-decoration: none
340 | }
341 | 
342 | .btn.focus, .btn:focus {
343 |     outline: 0;
344 |     box-shadow: 0 0 0 .2rem rgba(0, 123, 255, .25)
345 | }
346 | 
347 | .btn.disabled, .btn:disabled {
348 |     opacity: .65
349 | }
350 | 
351 | .btn:not(:disabled):not(.disabled) {
352 |     cursor: pointer
353 | }
354 | 
355 | a.btn.disabled, fieldset:disabled a.btn {
356 |     pointer-events: none
357 | }
358 | 
359 | .btn-primary {
360 |     color: #fff;
361 |     background-color: rgb(22, 38, 67);
362 |     border-color: rgb(22, 38, 67)
363 | }
364 | 
365 | .btn-primary:hover {
366 |     color: #fff;
367 |     background-color: rgb(22, 38, 67);
368 |     border-color: rgb(22, 38, 67)
369 | }
370 | 
371 | .btn-primary.focus, .btn-primary:focus {
372 |     box-shadow: 0 0 0 .2rem rgba(22, 38, 67, .5)
373 | }
374 | 
375 | .btn-primary.disabled, .btn-primary:disabled {
376 |     color: #fff;
377 |     background-color: rgb(22, 38, 67);
378 |     border-color: rgb(22, 38, 67)
379 | }
380 | 
381 | .btn-primary:not(:disabled):not(.disabled).active, .btn-primary:not(:disabled):not(.disabled):active,
382 | .show > .btn-primary.dropdown-toggle {
383 |     color: #fff;
384 |     background-color: rgb(22, 38, 67);
385 |     border-color: rgb(22, 38, 67)
386 | }
387 | 
388 | .btn-primary:not(:disabled):not(.disabled).active:focus, .btn-primary:not(:disabled):not(.disabled):active:focus,
389 | .show > .btn-primary.dropdown-toggle:focus {
390 |     box-shadow: 0 0 0 .2rem rgba(22, 38, 67, .5)
391 | }
392 | 
393 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """Train S2VC model."""
  3 | 
  4 | import argparse
  5 | import datetime
  6 | import random
  7 | from pathlib import Path
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | from torch.optim import AdamW
 12 | from torch.utils.data import DataLoader, random_split
 13 | from torch.utils.tensorboard import SummaryWriter
 14 | from tqdm import tqdm
 15 | import numpy as np
 16 | 
 17 | from data import IntraSpeakerDataset, collate_batch, plot_attn
 18 | from models import S2VC, get_cosine_schedule_with_warmup
 19 | 
 20 | random.seed(42)
 21 | torch.manual_seed(42)
 22 | torch.cuda.manual_seed(42)
 23 | torch.cuda.manual_seed_all(42)
 24 | np.random.seed(42)
 25 | 
 26 | 
 27 | def parse_args():
 28 |     """Parse command-line arguments."""
 29 |     parser = argparse.ArgumentParser()
 30 |     parser.add_argument("data_dir", type=str)
 31 |     parser.add_argument("--save_dir", type=str, default=".")
 32 |     parser.add_argument("--total_steps", type=int, default=250000)
 33 |     parser.add_argument("--warmup_steps", type=int, default=100)
 34 |     parser.add_argument("--valid_steps", type=int, default=1000)
 35 |     parser.add_argument("--log_steps", type=int, default=100)
 36 |     parser.add_argument("--save_steps", type=int, default=10000)
 37 |     parser.add_argument("--n_samples", type=int, default=10)
 38 |     parser.add_argument("--accu_steps", type=int, default=2)
 39 |     parser.add_argument("--batch_size", type=int, default=6)
 40 |     parser.add_argument("--n_workers", type=int, default=8)
 41 |     parser.add_argument('-s', "--src_feat", type=str, default='cpc')
 42 |     parser.add_argument('-r', "--ref_feat", type=str, default='cpc')
 43 |     parser.add_argument("--preload", action="store_true")
 44 |     parser.add_argument("--lr_reduction", action="store_true")
 45 |     parser.add_argument("--comment", type=str)
 46 | 
 47 | 
 48 |     return vars(parser.parse_args())
 49 | 
 50 | 
 51 | def model_fn(batch, model, criterion, device):
 52 |     """Forward a batch through model."""
 53 | 
 54 |     srcs, src_masks, tgts, tgt_masks, tgt_mels, overlap_lens = batch
 55 | 
 56 |     srcs = srcs.to(device)
 57 |     src_masks = src_masks.to(device)
 58 |     tgts = tgts.to(device)
 59 |     tgt_masks = tgt_masks.to(device)
 60 |     tgt_mels = tgt_mels.to(device)
 61 | 
 62 |     refs = tgts
 63 |     ref_masks = tgt_masks
 64 | 
 65 |     outs, attns = model(srcs, refs, src_masks=src_masks, ref_masks=ref_masks)
 66 |             
 67 |     losses = []
 68 |     for out, tgt_mel, attn, overlap_len in zip(outs.unbind(), tgt_mels.unbind(), attns[-1], overlap_lens):
 69 |         loss = criterion(out[:, :overlap_len], tgt_mel[:, :overlap_len])
 70 |         losses.append(loss)
 71 |     try:
 72 |         attns_plot = []
 73 |         for i in range(len(attns)):
 74 |             attns_plot.append(attns[i][0][:overlap_lens[0], :overlap_lens[0]])
 75 |     except:
 76 |         pass
 77 | 
 78 |         
 79 |     return sum(losses) / len(losses), attns_plot
 80 | 
 81 | 
 82 | def valid(dataloader, model, criterion, device):
 83 |     """Validate on validation set."""
 84 | 
 85 |     model.eval()
 86 |     running_loss = 0.0
 87 |     pbar = tqdm(total=len(dataloader.dataset), ncols=0, desc="Valid", unit=" uttr")
 88 | 
 89 |     for i, batch in enumerate(dataloader):
 90 |         with torch.no_grad():
 91 |             loss, attns = model_fn(batch, model, criterion, device)
 92 |             running_loss += loss.item()
 93 | 
 94 |         pbar.update(dataloader.batch_size)
 95 |         pbar.set_postfix(loss=f"{running_loss / (i+1):.2f}")
 96 | 
 97 |     pbar.close()
 98 |     model.train()
 99 | 
100 |     return running_loss / len(dataloader), attns
101 | 
102 | 
103 | def main(
104 |     data_dir,
105 |     save_dir,
106 |     total_steps,
107 |     warmup_steps,
108 |     valid_steps,
109 |     log_steps,
110 |     save_steps,
111 |     n_samples,
112 |     accu_steps,
113 |     batch_size,
114 |     n_workers,
115 |     src_feat,
116 |     ref_feat,
117 |     preload,
118 |     lr_reduction,
119 |     comment,
120 | ):
121 |     """Main function."""
122 | 
123 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
124 | 
125 |     metadata_path = Path(data_dir) / "metadata.json"
126 |     
127 |     dataset = IntraSpeakerDataset(
128 |         data_dir, metadata_path, src_feat, ref_feat, n_samples, preload
129 |     )
130 |     input_dim, ref_dim, tgt_dim = dataset.get_feat_dim()
131 |     lengths = [trainlen := int(0.9 * len(dataset)), len(dataset) - trainlen]
132 |     trainset, validset = random_split(dataset, lengths)
133 |     print(f'Input dim: {input_dim}, Reference dim: {ref_dim}, Target dim: {tgt_dim}')
134 |     model = S2VC(input_dim, ref_dim).to(device)
135 |     model = torch.jit.script(model)
136 | 
137 |     train_loader = DataLoader(
138 |         trainset,
139 |         batch_size=batch_size,
140 |         shuffle=True,
141 |         drop_last=True,
142 |         num_workers=n_workers,
143 |         pin_memory=True,
144 |         collate_fn=collate_batch,
145 |     )
146 |     valid_loader = DataLoader(
147 |         validset,
148 |         batch_size=batch_size * accu_steps,
149 |         num_workers=n_workers,
150 |         drop_last=True,
151 |         pin_memory=True,
152 |         # shuffle to make the plot on tensorboard differenct
153 |         shuffle=True,
154 |         collate_fn=collate_batch,
155 |     )
156 |     train_iterator = iter(train_loader)
157 | 
158 |     if comment is not None:
159 |         log_dir = "logs/"
160 |         log_dir += datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
161 |         log_dir += "_" + comment
162 |         writer = SummaryWriter(log_dir)
163 | 
164 |     save_dir_path = Path(save_dir)
165 |     save_dir_path.mkdir(parents=True, exist_ok=True)
166 | 
167 | 
168 |     learning_rate = 5e-5
169 |     criterion = nn.L1Loss()
170 |     optimizer = AdamW(model.parameters(), lr=learning_rate)
171 |     scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
172 | 
173 |     best_loss = float("inf")
174 |     best_state_dict = None
175 | 
176 |     pbar = tqdm(total=valid_steps, ncols=0, desc="Train", unit=" step")
177 | 
178 |     for step in range(total_steps):
179 |         if step == 40002:
180 |             file = open('completed.txt', 'a')
181 |             print(f'{comment} completed', file=file)
182 |             break
183 |         batch_loss = 0.0
184 | 
185 |         for _ in range(accu_steps):
186 |             try:
187 |                 batch = next(train_iterator)
188 |             except StopIteration:
189 |                 train_iterator = iter(train_loader)
190 |                 batch = next(train_iterator)
191 | 
192 |             loss, attns = model_fn(batch, model, criterion, device)
193 |             loss = loss / accu_steps
194 |             batch_loss += loss.item()
195 |             loss.backward()
196 | 
197 |         optimizer.step()
198 |         scheduler.step()
199 |         optimizer.zero_grad()
200 | 
201 |         pbar.update()
202 |         pbar.set_postfix(loss=f"{batch_loss:.2f}", step=step + 1)
203 | 
204 |         if step % log_steps == 0 and comment is not None:
205 |             writer.add_scalar("Loss/train", batch_loss, step)
206 |             try:
207 |                 attn = [attns[i].unsqueeze(0) for i in range(len(attns))]
208 |                 figure = plot_attn(attn, save=False)
209 |                 writer.add_figure(f"Image/Train-Attentions.png", figure, step + 1)
210 |             except:
211 |                 pass
212 | 
213 |         if (step + 1) % valid_steps == 0:
214 |             pbar.close()
215 | 
216 |             valid_loss, attns = valid(valid_loader, model, criterion, device)
217 | 
218 |             if comment is not None:
219 |                 writer.add_scalar("Loss/valid", valid_loss, step + 1)
220 |                 try:
221 |                     attn = [attns[i].unsqueeze(0) for i in range(len(attns))]
222 |                     figure = plot_attn(attn, save=False)
223 |                     writer.add_figure(f"Image/Valid-Attentions.png", figure, step + 1)
224 |                 except:
225 |                     pass
226 | 
227 |             if valid_loss < best_loss:
228 |                 best_loss = valid_loss
229 |                 best_state_dict = model.state_dict()
230 | 
231 |             pbar = tqdm(total=valid_steps, ncols=0, desc="Train", unit=" step")
232 | 
233 |         if (step + 1) % save_steps == 0 and best_state_dict is not None:
234 |             loss_str = f"{best_loss:.4f}".replace(".", "dot")
235 |             best_ckpt_name = f"retriever-best-loss{loss_str}.pt"
236 | 
237 |             loss_str = f"{valid_loss:.4f}".replace(".", "dot")
238 |             curr_ckpt_name = f"retriever-step{step+1}-loss{loss_str}.pt"
239 | 
240 |             current_state_dict = model.state_dict()
241 |             model.cpu()
242 | 
243 |             model.load_state_dict(best_state_dict)
244 |             model.save(str(save_dir_path / best_ckpt_name))
245 | 
246 |             model.load_state_dict(current_state_dict)
247 |             model.save(str(save_dir_path / curr_ckpt_name))
248 | 
249 |             model.to(device)
250 |             pbar.write(f"Step {step + 1}, best model saved. (loss={best_loss:.4f})")
251 | 
252 |         
253 |     pbar.close()
254 | 
255 | 
256 | if __name__ == "__main__":
257 |     main(**parse_args())
258 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <title>S2VC: A Framework for Any-to-Any Voice Conversion with Self-Supervised Pretrained Representations</title>
  7 |     <link rel="stylesheet" type="text/css" href="styles.css">
  8 |     <script src="jquery-3.5.js"></script>
  9 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 10 |     <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.1/css/bootstrap.min.css"
 11 |         integrity="sha384-VCmXjywReHh4PwowAiWNagnWcLhlEJLA5buUprzK8rxFgeH0kww/aWY76TfkUoSX" crossorigin="anonymous">
 12 |     <link rel="apple-touch-icon" sizes="57x57" href="favicons/apple-icon-57x57.png">
 13 |     <link rel="apple-touch-icon" sizes="60x60" href="favicons/apple-icon-60x60.png">
 14 |     <link rel="apple-touch-icon" sizes="72x72" href="favicons/apple-icon-72x72.png">
 15 |     <link rel="apple-touch-icon" sizes="76x76" href="favicons/apple-icon-76x76.png">
 16 |     <link rel="apple-touch-icon" sizes="114x114" href="favicons/apple-icon-114x114.png">
 17 |     <link rel="apple-touch-icon" sizes="120x120" href="favicons/apple-icon-120x120.png">
 18 |     <link rel="apple-touch-icon" sizes="144x144" href="favicons/apple-icon-144x144.png">
 19 |     <link rel="apple-touch-icon" sizes="152x152" href="favicons/apple-icon-152x152.png">
 20 |     <link rel="apple-touch-icon" sizes="180x180" href="favicons/apple-icon-180x180.png">
 21 |     <link rel="icon" type="image/png" sizes="192x192" href="favicons/android-icon-192x192.png">
 22 |     <link rel="icon" type="image/png" sizes="32x32" href="favicons/favicon-32x32.png">
 23 |     <link rel="icon" type="image/png" sizes="96x96" href="favicons/favicon-96x96.png">
 24 |     <link rel="icon" type="image/png" sizes="16x16" href="favicons/favicon-16x16.png">
 25 |     <link rel="manifest" href="favicons/manifest.json">
 26 |     <meta name="msapplication-TileColor" content="#ffffff">
 27 |     <meta name="msapplication-TileImage" content="favicons/ms-icon-144x144.png">
 28 |     <meta name="theme-color" content="#ffffff">
 29 | </head>
 30 | 
 31 | <body>
 32 |     <div class="jumbotron jumbotron-fluid">
 33 |         <div class="container">
 34 |             <h1 class="display-4">Audio Demo</h1>
 35 |             <p class="content-title">S2VC: A Framework for Any-to-Any Voice Conversion with Self-SupervisedPretrained
 36 |                 Representations</p>
 37 |             <hr class="my-4">
 38 |             <p>
 39 |                 <b>Abstract:</b>
 40 |                 Any-to-any voice conversion (VC) aims to convert the timbre of utterances from and to any speakers seen
 41 |                 or unseen during training. Various any-to-any VC approaches have been proposed like AUTOVC, AdaINVC, and
 42 |                 FragmentVC. AUTOVC, and AdaINVC utilize source and target encoders to disentangle the content and
 43 |                 speaker information of the features. FragmentVC utilizes two encoders to encode source and target
 44 |                 information and adopts cross attention to align the source and target features with similar phonetic
 45 |                 content. Moreover, pre-trained features are adopted. AUTOVC used dvector to extract speaker information,
 46 |                 and self-supervised learning (SSL) features like wav2vec 2.0 is used in FragmentVC to extract the
 47 |                 phonetic content information. Different from previous works, we proposed S2VC that utilizes
 48 |                 Self-Supervised features as both source and target features for VC model. Supervised phoneme
 49 |                 posteriororgram (PPG), which is believed to be speaker-independent and widely used in VC to extract
 50 |                 content information, is chosen as a strong baseline for SSL features. The objective evaluation and
 51 |                 subjective evaluation both show models taking SSL feature CPC as both source and target features
 52 |                 outperforms that taking PPG as source feature, suggesting that SSL features have great potential in
 53 |                 improving VC.
 54 |             </p>
 55 |             <p>
 56 |                 <!-- <a href="https://github.com/yistLin/FragmentVC" type="button" class="btn btn-dark">GitHub (Source Code)</a> -->
 57 |                 <a href="https://arxiv.org/abs/2104.02901" type="button" class="btn btn-danger">arXiv (Preprint)</a>
 58 |             </p>
 59 |         </div>
 60 |     </div>
 61 | 
 62 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css">
 63 | 
 64 |     <script src="wavesurfer.js"></script>
 65 | 
 66 |     <div class="content-container">
 67 |         <div class="content-title">Samples</div>
 68 |         <div id='descript'>
 69 |         </div>
 70 |         <div id='legend'>
 71 |             <ul style="list-style-type:circle">
 72 |             </ul>
 73 |         </div>
 74 |         <div class="option-div">
 75 |             <label for="scenario">Choose a scenario:</label>
 76 |             <select id="scenario" onchange=record(this)>
 77 |                 <option value=seen>seen to seen</option>
 78 |                 <option value=unseen>unseen to unseen</option>
 79 |             </select>
 80 |         </div>
 81 | 
 82 |         <div class="option-div" id="task-div">
 83 |             <label for="task">Choose a Task:</label>
 84 |             <select id="task" onchange=record(this)>
 85 |                 <option value="m2m">Male to Male</option>
 86 |                 <option value="f2f">Female to Female</option>
 87 |                 <option value="m2f">Male to Female</option>
 88 |                 <option value="f2m">Female to Male</option>
 89 |             </select>
 90 |         </div>
 91 | 
 92 |         <div class="option-div" id="fix-div">
 93 |             <label for="fix">Fix the:</label>
 94 |             <select id="fix" onchange=record(this)>
 95 |                 <option value="src_feat">Source Feature</option>
 96 |                 <option value="tgt_feat">Target Feature</option>
 97 |             </select>
 98 |         </div>
 99 |         <div class="option-div" id="source-div">
100 |             <label for="source">Choose a feature:</label>
101 |             <select id="source" onchange=record(this)>
102 |                 <option value="Mel">Mel-spectrogram</option>
103 |                 <option value="PPG">Phone-posteriorgram</option>
104 |                 <option value="APC">Autoregressive-predictive-coding</option>
105 |                 <option value="CPC">Contrastive-predictive-coding</option>
106 |                 <option value="W2V">wav2vec 2.0</option>
107 |             </select>
108 |         </div>
109 |         <div id="result-div"></div>
110 |     </div>
111 | 
112 |     <div id="loading-status">Loading......</div>
113 | 
114 |     <script>
115 |         var wavesurfer_original_src;
116 |         var wavesurfer_original_tgt;
117 |         var wavesurfer_converted = [
118 |             1, 2, 3, 4, 5, 6
119 |         ];
120 |     </script>
121 | 
122 |     <script>
123 |         /* Highlights the navigation bar button corresponding to page
124 |         to indicate which page a user is currently viewing.
125 |         */
126 |         var scenario = "seen";
127 |         var fix = "src_feat";
128 |         var source = "CPC";
129 |         var task = "m2m";
130 | 
131 |         var feats = [
132 |             "Mel",
133 |             "PPG",
134 |             "APC",
135 |             "CPC",
136 |             "W2V",
137 |         ];
138 | 
139 |         var task_mapping = {
140 |             "m2m": "Male to Male",
141 |             "f2f": "Female to Female",
142 |             "m2f": "Male to Female",
143 |             "f2m": "Female to Male"
144 |         };
145 | 
146 |         var scenario_mapping = {
147 |             'seen': 'seen to seen',
148 |             'unseen': 'unseen to unseen',
149 |         }
150 | 
151 |         var speaker_name_mapping = {
152 |             'seen-m2m-source': "p258",
153 |             'seen-m2m-target': "p251",
154 |             'seen-f2f-source': "p239",
155 |             'seen-f2f-target': "p282",
156 |             'seen-m2f-source': "p278",
157 |             'seen-m2f-target': "p267",
158 |             'seen-f2m-source': "p300",
159 |             'seen-f2m-target': "p285",
160 |             'unseen-m2m-source': "aup",
161 |             'unseen-m2m-target': "ahw",
162 |             'unseen-f2f-source': "eey",
163 |             'unseen-f2f-target': "axb",
164 |             'unseen-m2f-source': "ahw",
165 |             'unseen-m2f-target': "clb",
166 |             'unseen-f2m-source': "slt",
167 |             'unseen-f2m-target': "aew",
168 | 
169 |         }
170 | 
171 | 
172 |         function lock() {
173 |             $('#secnario').attr('disabled', 'disabled');
174 |             $('#task').attr('disabled', 'disabled');
175 |             $('#fix').attr('disabled', 'disabled');
176 |             $('#source').attr('disabled', 'disabled');
177 |             $('#loading-status').show();
178 |         }
179 |         function unlock() {
180 |             $('#scenario').removeAttr('disabled');
181 |             $('#task').removeAttr('disabled');
182 |             $('#fix').removeAttr('disabled');
183 |             $('#source').removeAttr('disabled');
184 |             $('#loading-status').hide();
185 |         }
186 | 
187 |         function preload(scenario, source, push) {
188 |             lock();
189 |             $('#source').val(source);
190 |             $("#result-div").empty();
191 |             $("#result-div").append(createTable());
192 |             let footer = document.createElement('p');
193 |             footer.append('W2V: wav2vec 2.0');
194 |             $("#result-div").append(footer);
195 |             wavesurfer_original_src = WaveSurfer.create({
196 |                 container: '#source-uttr-div',
197 |                 waveColor: 'violet',
198 |                 progressColor: 'purple',
199 |             });
200 |             wavesurfer_original_src.load('./wavs/Ground-truth/' + scenario + '-' + task + '-source' + '.wav');
201 | 
202 |             wavesurfer_original_tgt = WaveSurfer.create({
203 |                 container: '#target-uttr-div',
204 |                 waveColor: 'violet',
205 |                 progressColor: 'purple',
206 |             });
207 | 
208 |             wavesurfer_original_tgt.load('./wavs/Ground-truth/' + scenario + '-' + task + '-target' + '.wav');
209 | 
210 |             for (var j = 0; j < feats.length; j++) {
211 |                 wavesurfer_converted[j] = WaveSurfer.create({
212 |                     container: '#converted-uttr-' + j + '-div',
213 |                     waveColor: 'violet',
214 |                     progressColor: 'purple',
215 |                 });
216 | 
217 |                 if (fix === 'src_feat')
218 |                     wavesurfer_converted[j].load("./wavs/" + source.toLowerCase() + "-" + feats[j].toLocaleLowerCase() + "/" + scenario + '-' + task + ".wav");
219 |                 else
220 |                     wavesurfer_converted[j].load("./wavs/" + feats[j].toLowerCase() + "-" + source.toLocaleLowerCase() + "/" + scenario + '-' + task + ".wav");
221 |             }
222 |             unlock();
223 |         }
224 | 
225 |         preload(scenario, source, true);
226 | 
227 |         function createTable() {
228 |             var tableElem;
229 |             tableElem = document.createElement('table');
230 |             return voice_conversion(tableElem);
231 |         }
232 | 
233 |         function voice_conversion(tableElem) {
234 |             var truthline, headline, rowElem, colElem;
235 | 
236 |             truthline = document.createElement('tr');
237 |             colElem = document.createElement('td');
238 |             colElem.appendChild(document.createTextNode('Source speaker:'));
239 |             colElem.appendChild(document.createElement('br'));
240 |             colElem.appendChild(document.createTextNode(speaker_name_mapping[scenario + '-' + task + '-source']))
241 |             colElem.style.color = 'rgb(22, 38, 67)';
242 |             colElem.style.center = true;
243 |             truthline.appendChild(colElem);
244 | 
245 |             colElem = document.createElement('div');
246 |             colElem.setAttribute("id", "source-uttr-div");
247 |             colElem.style.color = txt_color;
248 |             colElem.style.center = true;
249 |             truthline.appendChild(colElem);
250 | 
251 |             colElem = document.createElement('td');
252 |             var x = document.createElement("input");
253 |             x.setAttribute("type", "button");
254 |             x.setAttribute("value", "PLAY");
255 |             x.setAttribute("class", "play_button-demo btn btn-primary");
256 |             x.setAttribute("onclick", "wavesurfer_original_src.playPause()");
257 |             colElem.appendChild(x);
258 |             colElem.style.color = txt_color;
259 |             colElem.style.center = true;
260 |             truthline.appendChild(colElem);
261 | 
262 |             colElem = document.createElement('td');
263 |             colElem.appendChild(document.createTextNode('Target speaker: '));
264 |             colElem.appendChild(document.createElement('br'));
265 |             colElem.appendChild(document.createTextNode(speaker_name_mapping[scenario + '-' + task + '-target']))
266 |             colElem.style.color = 'rgb(22, 38, 67)';
267 |             colElem.style.center = true;
268 |             truthline.appendChild(colElem);
269 | 
270 |             colElem = document.createElement('div');
271 |             colElem.setAttribute("id", "target-uttr-div");
272 |             colElem.style.color = txt_color;
273 |             colElem.style.center = true;
274 |             truthline.appendChild(colElem);
275 | 
276 |             colElem = document.createElement('td');
277 |             var x = document.createElement("input");
278 |             x.setAttribute("type", "button");
279 |             x.setAttribute("value", "PLAY");
280 |             x.setAttribute("class", "play_button-demo btn btn-primary");
281 |             x.setAttribute("onclick", "wavesurfer_original_tgt.playPause()");
282 |             colElem.appendChild(x);
283 |             colElem.style.color = txt_color;
284 |             colElem.style.center = true;
285 |             truthline.appendChild(colElem);
286 | 
287 |             tableElem.appendChild(truthline);
288 | 
289 | 
290 |             headline = document.createElement('tr');
291 | 
292 |             colElem = document.createElement('td');
293 |             colElem.appendChild(document.createTextNode('Scenario'));
294 |             colElem.style.color = 'rgb(22, 38, 67)';
295 |             colElem.style.center = true;
296 |             headline.appendChild(colElem);
297 | 
298 |             colElem = document.createElement('td');
299 |             colElem.appendChild(document.createTextNode('Task'));
300 |             colElem.style.color = 'rgb(22, 38, 67)';
301 |             colElem.style.center = true;
302 |             colElem.style.whiteSpace = 'pre';
303 |             headline.appendChild(colElem);
304 | 
305 |             colElem = document.createElement('td');
306 |             if (fix === "src_feat")
307 |                 colElem.appendChild(document.createTextNode('Source Feature'));
308 |             else
309 |                 colElem.appendChild(document.createTextNode('Target Feature'));
310 |             colElem.style.color = 'rgb(22, 38, 67)';
311 |             colElem.style.center = true;
312 |             headline.appendChild(colElem);
313 | 
314 |             colElem = document.createElement('td');
315 |             if (fix === "src_feat")
316 |                 colElem.appendChild(document.createTextNode('Target Feature'));
317 |             else
318 |                 colElem.appendChild(document.createTextNode('Source Feature'));
319 |             colElem.style.color = 'rgb(22, 38, 67)';
320 |             colElem.style.center = true;
321 |             headline.appendChild(colElem);
322 | 
323 |             colElem = document.createElement('td');
324 |             colElem.appendChild(document.createTextNode('Converted result'));
325 |             colElem.style.color = 'rgb(22, 38, 67)';
326 |             colElem.style.center = true;
327 |             headline.appendChild(colElem);
328 | 
329 |             colElem = document.createElement('td');
330 |             colElem.style.color = 'rgb(22, 38, 67)';
331 |             colElem.style.center = true;
332 |             headline.appendChild(colElem);
333 | 
334 |             tableElem.appendChild(headline);
335 | 
336 |             var bg_color = 'white';
337 |             var txt_color = 'rgb(22, 38, 67)';
338 | 
339 |             var N = feats.length;
340 |             for (var j = 0; j < N; j++) {
341 |                 rowElem = document.createElement('tr');
342 |                 rowElem.style.backgroundColor = bg_color;
343 | 
344 |                 colElem = document.createElement('td');
345 |                 colElem.appendChild(document.createTextNode(scenario));
346 |                 colElem.style.color = txt_color;
347 |                 colElem.style.center = true;
348 |                 rowElem.appendChild(colElem);
349 | 
350 |                 colElem = document.createElement('td');
351 |                 colElem.appendChild(document.createTextNode(task_mapping[task]));
352 |                 colElem.style.color = txt_color;
353 |                 colElem.style.center = true;
354 |                 rowElem.appendChild(colElem);
355 | 
356 |                 colElem = document.createElement('td');
357 |                 colElem.appendChild(document.createTextNode(source));
358 |                 colElem.style.color = txt_color;
359 |                 colElem.style.center = true;
360 |                 rowElem.appendChild(colElem);
361 | 
362 |                 colElem = document.createElement('td');
363 |                 colElem.appendChild(document.createTextNode(feats[j]));
364 |                 colElem.style.color = txt_color;
365 |                 colElem.style.center = true;
366 |                 rowElem.appendChild(colElem);
367 | 
368 |                 colElem = document.createElement('div');
369 |                 colElem.setAttribute("id", 'converted-uttr-' + j + '-div');
370 |                 colElem.style.color = txt_color;
371 |                 colElem.style.center = true;
372 |                 rowElem.appendChild(colElem);
373 | 
374 |                 colElem = document.createElement('td');
375 |                 var x = document.createElement("input");
376 |                 x.setAttribute("type", "button");
377 |                 x.setAttribute("value", "PLAY");
378 |                 x.setAttribute("class", "play_button-demo btn btn-primary");
379 |                 x.setAttribute("onclick", 'wavesurfer_converted[' + j + '].playPause()');
380 |                 colElem.appendChild(x);
381 |                 colElem.style.color = txt_color;
382 |                 colElem.style.center = true;
383 |                 rowElem.appendChild(colElem);
384 | 
385 | 
386 |                 tableElem.appendChild(rowElem);
387 | 
388 |             }
389 | 
390 |             return tableElem;
391 |         }
392 |         function fetch_result() {
393 |             $(window).ready(function () {
394 |                 scenario = $("#scenario").val();
395 |                 source = $("#source").val();
396 |                 task = $("#task").val();
397 |                 fix = $("#fix").val();
398 |                 $("#result-div").empty();
399 |                 $("#fix").empty();
400 |                 $("#fix").append('<option value="src_feat">Source Feature</option>');
401 |                 $("#fix").append('<option value="tgt_feat">Target Feature</option>');
402 |                 $("#fix").val(fix);
403 | 
404 |                 $("#source").empty();
405 |                 $("#source").append('<option value="Mel">Mel-spectrogram</option>');
406 |                 $("#source").append('<option value="PPG">Phone-posteriorgram</option>');
407 |                 $("#source").append('<option value="APC">Autoregressive-predictive-coding</option>');
408 |                 $("#source").append('<option value="CPC">Contrastive-predictive-coding</option>');
409 |                 $("#source").append('<option value="W2V">wav2vec 2.0</option>');
410 |                 $("#source").val(source);
411 | 
412 |                 $("#task").empty();
413 |                 $("#task").append('<option value="m2m">Male to Male</option>');
414 |                 $("#task").append('<option value="f2f">Female to Female</option>');
415 |                 $("#task").append('<option value="m2f">Male to Female</option>');
416 |                 $("#task").append('<option value="f2m">Female to Male</option>');
417 |                 $("#task").val(task);
418 | 
419 |                 $("#result-div").append(createTable());
420 |                 let footer = document.createElement('p');
421 |                 footer.append('W2V: wav2vec 2.0');
422 |                 $("#result-div").append(footer);
423 | 
424 |                 wavesurfer_original_src = WaveSurfer.create({
425 |                     container: '#source-uttr-div',
426 |                     waveColor: 'violet',
427 |                     progressColor: 'purple',
428 |                 });
429 |                 wavesurfer_original_src.load('./wavs/Ground-truth/' + scenario + '-' + task + '-source' + '.wav');
430 | 
431 | 
432 |                 wavesurfer_original_tgt = WaveSurfer.create({
433 |                     container: '#target-uttr-div',
434 |                     waveColor: 'violet',
435 |                     progressColor: 'purple',
436 |                 });
437 |                 wavesurfer_original_tgt.load('./wavs/Ground-truth/' + scenario + '-' + task + '-target' + '.wav');
438 | 
439 |                 for (var j = 0; j < feats.length; j++) {
440 |                     wavesurfer_converted[j] = WaveSurfer.create({
441 |                         container: '#converted-uttr-' + j + '-div',
442 |                         waveColor: 'violet',
443 |                         progressColor: 'purple',
444 |                     });
445 |                     if (fix === 'src_feat')
446 |                         wavesurfer_converted[j].load("./wavs/" + source.toLowerCase() + "-" + feats[j].toLocaleLowerCase() + "/" + scenario + '-' + task + ".wav");
447 |                     else
448 |                         wavesurfer_converted[j].load("./wavs/" + feats[j].toLowerCase() + "-" + source.toLocaleLowerCase() + "/" + scenario + '-' + task + ".wav");
449 |                 }
450 |                 unlock();
451 |             });
452 |         }
453 | 
454 |         function record(sel) {
455 |             lock();
456 |             fetch_result();
457 |         }
458 | 
459 |         window.addEventListener('popstate', function (e) {
460 |             preload(e.state["scenario"], e.state["source"], false);
461 |         });
462 |     </script>
463 | 
464 | 
465 |     <div class="container" style="padding-top: 60px;">
466 |         <p class="text-center text-muted">&copy; 台大語音實驗室 NTU Speech Lab</p>
467 |     </div>
468 | </body>
469 | 
470 | </html>
471 | 


--------------------------------------------------------------------------------