├── .gitignore ├── LICENSE ├── README.md ├── assets ├── architecture.png └── results.png ├── baselines └── descript │ ├── README.md │ ├── conf │ ├── 16khz_dns_9k.yml │ ├── 16khz_dns_9k_tiny.yml │ └── descript_6k_final.yml │ ├── dac │ ├── __init__.py │ ├── __main__.py │ ├── compare │ │ ├── __init__.py │ │ └── encodec.py │ ├── model │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dac.py │ │ └── discriminator.py │ ├── nn │ │ ├── __init__.py │ │ ├── layers.py │ │ ├── loss.py │ │ └── quantize.py │ └── utils │ │ ├── __init__.py │ │ ├── decode.py │ │ └── encode.py │ └── scripts │ ├── train_customize.py │ └── train_customize_no_adv.py ├── configs ├── 9kbps_esc_base.yaml ├── 9kbps_esc_base_adv.yaml ├── 9kbps_esc_large.yaml └── ablations │ ├── 9kbps_csvq_conv.yaml │ ├── 9kbps_csvq_swinT.yaml │ ├── 9kbps_rvq_conv.yaml │ └── 9kbps_rvq_swinT.yaml ├── esc ├── __init__.py ├── models │ ├── __init__.py │ ├── base.py │ ├── codecs.py │ ├── csrvq.py │ ├── discriminator.py │ └── utils.py └── modules │ ├── __init__.py │ ├── convolution │ └── layers.py │ ├── loss │ ├── __init__.py │ ├── gan_loss.py │ └── generator_loss.py │ ├── transformer │ ├── __init__.py │ ├── attention.py │ └── scale.py │ └── vq │ ├── __init__.py │ ├── codebook.py │ ├── initialize.py │ └── quantization.py ├── example.ipynb ├── main.py ├── requirements.txt ├── scripts ├── __init__.py ├── compress.py ├── metrics.py ├── test.py ├── trainer_adv.py ├── trainer_no_adv.py └── utils.py └── scripts_all.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/README.md -------------------------------------------------------------------------------- /assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/assets/architecture.png -------------------------------------------------------------------------------- /assets/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/assets/results.png -------------------------------------------------------------------------------- /baselines/descript/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/README.md -------------------------------------------------------------------------------- /baselines/descript/conf/16khz_dns_9k.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/conf/16khz_dns_9k.yml -------------------------------------------------------------------------------- /baselines/descript/conf/16khz_dns_9k_tiny.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/conf/16khz_dns_9k_tiny.yml -------------------------------------------------------------------------------- /baselines/descript/conf/descript_6k_final.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/conf/descript_6k_final.yml -------------------------------------------------------------------------------- /baselines/descript/dac/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/__init__.py -------------------------------------------------------------------------------- /baselines/descript/dac/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/__main__.py -------------------------------------------------------------------------------- /baselines/descript/dac/compare/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/descript/dac/compare/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/compare/encodec.py -------------------------------------------------------------------------------- /baselines/descript/dac/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/model/__init__.py -------------------------------------------------------------------------------- /baselines/descript/dac/model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/model/base.py -------------------------------------------------------------------------------- /baselines/descript/dac/model/dac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/model/dac.py -------------------------------------------------------------------------------- /baselines/descript/dac/model/discriminator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/model/discriminator.py -------------------------------------------------------------------------------- /baselines/descript/dac/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/nn/__init__.py -------------------------------------------------------------------------------- /baselines/descript/dac/nn/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/nn/layers.py -------------------------------------------------------------------------------- /baselines/descript/dac/nn/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/nn/loss.py -------------------------------------------------------------------------------- /baselines/descript/dac/nn/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/nn/quantize.py -------------------------------------------------------------------------------- /baselines/descript/dac/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/utils/__init__.py -------------------------------------------------------------------------------- /baselines/descript/dac/utils/decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/utils/decode.py -------------------------------------------------------------------------------- /baselines/descript/dac/utils/encode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/dac/utils/encode.py -------------------------------------------------------------------------------- /baselines/descript/scripts/train_customize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/scripts/train_customize.py -------------------------------------------------------------------------------- /baselines/descript/scripts/train_customize_no_adv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/baselines/descript/scripts/train_customize_no_adv.py -------------------------------------------------------------------------------- /configs/9kbps_esc_base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/configs/9kbps_esc_base.yaml -------------------------------------------------------------------------------- /configs/9kbps_esc_base_adv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/configs/9kbps_esc_base_adv.yaml -------------------------------------------------------------------------------- /configs/9kbps_esc_large.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/configs/9kbps_esc_large.yaml -------------------------------------------------------------------------------- /configs/ablations/9kbps_csvq_conv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/configs/ablations/9kbps_csvq_conv.yaml -------------------------------------------------------------------------------- /configs/ablations/9kbps_csvq_swinT.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/configs/ablations/9kbps_csvq_swinT.yaml -------------------------------------------------------------------------------- /configs/ablations/9kbps_rvq_conv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/configs/ablations/9kbps_rvq_conv.yaml -------------------------------------------------------------------------------- /configs/ablations/9kbps_rvq_swinT.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/configs/ablations/9kbps_rvq_swinT.yaml -------------------------------------------------------------------------------- /esc/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import ESC, RVQCodecs -------------------------------------------------------------------------------- /esc/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/models/__init__.py -------------------------------------------------------------------------------- /esc/models/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/models/base.py -------------------------------------------------------------------------------- /esc/models/codecs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/models/codecs.py -------------------------------------------------------------------------------- /esc/models/csrvq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/models/csrvq.py -------------------------------------------------------------------------------- /esc/models/discriminator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/models/discriminator.py -------------------------------------------------------------------------------- /esc/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/models/utils.py -------------------------------------------------------------------------------- /esc/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/__init__.py -------------------------------------------------------------------------------- /esc/modules/convolution/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/convolution/layers.py -------------------------------------------------------------------------------- /esc/modules/loss/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /esc/modules/loss/gan_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/loss/gan_loss.py -------------------------------------------------------------------------------- /esc/modules/loss/generator_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/loss/generator_loss.py -------------------------------------------------------------------------------- /esc/modules/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /esc/modules/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/transformer/attention.py -------------------------------------------------------------------------------- /esc/modules/transformer/scale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/transformer/scale.py -------------------------------------------------------------------------------- /esc/modules/vq/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /esc/modules/vq/codebook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/vq/codebook.py -------------------------------------------------------------------------------- /esc/modules/vq/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/vq/initialize.py -------------------------------------------------------------------------------- /esc/modules/vq/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/esc/modules/vq/quantization.py -------------------------------------------------------------------------------- /example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/example.ipynb -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/main.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/compress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/scripts/compress.py -------------------------------------------------------------------------------- /scripts/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/scripts/metrics.py -------------------------------------------------------------------------------- /scripts/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/scripts/test.py -------------------------------------------------------------------------------- /scripts/trainer_adv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/scripts/trainer_adv.py -------------------------------------------------------------------------------- /scripts/trainer_no_adv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/scripts/trainer_no_adv.py -------------------------------------------------------------------------------- /scripts/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/scripts/utils.py -------------------------------------------------------------------------------- /scripts_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yzGuu830/efficient-speech-codec/HEAD/scripts_all.sh --------------------------------------------------------------------------------