├── LICENSE
├── README.md
├── egs
    ├── cycvae_mwdlp_vcc20
    │   ├── README.md
    │   ├── cmd.sh
    │   ├── conf
    │   │   ├── config.yml
    │   │   └── slurm.conf
    │   ├── demo_realtime
    │   │   ├── Makefile
    │   │   ├── demo_anasyn.sh
    │   │   ├── demo_interp.sh
    │   │   ├── demo_melsp.sh
    │   │   ├── demo_point.sh
    │   │   ├── inc
    │   │   │   ├── _kiss_fft_guts.h
    │   │   │   ├── arch.h
    │   │   │   ├── common.h
    │   │   │   ├── freq.h
    │   │   │   ├── kiss_fft.h
    │   │   │   ├── mwdlp10net_cycvae.h
    │   │   │   ├── mwdlp10net_cycvae_private.h
    │   │   │   ├── nnet.h
    │   │   │   ├── opus_types.h
    │   │   │   ├── tansig_table.h
    │   │   │   ├── vec.h
    │   │   │   ├── vec_avx.h
    │   │   │   ├── vec_neon.h
    │   │   │   └── wave.h
    │   │   └── src
    │   │   │   ├── freq.c
    │   │   │   ├── kiss_fft.c
    │   │   │   ├── mwdlp10net_cycvae.c
    │   │   │   ├── nnet.c
    │   │   │   ├── test_cycvae_mwdlp.c
    │   │   │   └── wave.c
    │   ├── demo_realtime_init
    │   │   ├── Makefile
    │   │   ├── demo_anasyn.sh
    │   │   ├── demo_interp.sh
    │   │   ├── demo_melsp.sh
    │   │   ├── demo_point.sh
    │   │   ├── inc
    │   │   │   ├── _kiss_fft_guts.h
    │   │   │   ├── arch.h
    │   │   │   ├── common.h
    │   │   │   ├── freq.h
    │   │   │   ├── kiss_fft.h
    │   │   │   ├── mwdlp10net_cycvae.h
    │   │   │   ├── mwdlp10net_cycvae_private.h
    │   │   │   ├── nnet.h
    │   │   │   ├── opus_types.h
    │   │   │   ├── tansig_table.h
    │   │   │   ├── vec.h
    │   │   │   ├── vec_avx.h
    │   │   │   ├── vec_neon.h
    │   │   │   └── wave.h
    │   │   └── src
    │   │   │   ├── freq.c
    │   │   │   ├── kiss_fft.c
    │   │   │   ├── mwdlp10net_cycvae.c
    │   │   │   ├── nnet.c
    │   │   │   └── wave.c
    │   ├── demo_realtime_mid
    │   │   ├── Makefile
    │   │   ├── demo_anasyn.sh
    │   │   ├── demo_interp.sh
    │   │   ├── demo_melsp.sh
    │   │   ├── demo_point.sh
    │   │   ├── inc
    │   │   │   ├── _kiss_fft_guts.h
    │   │   │   ├── arch.h
    │   │   │   ├── common.h
    │   │   │   ├── freq.h
    │   │   │   ├── kiss_fft.h
    │   │   │   ├── mwdlp10net_cycvae.h
    │   │   │   ├── mwdlp10net_cycvae_private.h
    │   │   │   ├── nnet.h
    │   │   │   ├── opus_types.h
    │   │   │   ├── tansig_table.h
    │   │   │   ├── vec.h
    │   │   │   ├── vec_avx.h
    │   │   │   ├── vec_neon.h
    │   │   │   └── wave.h
    │   │   └── src
    │   │   │   ├── freq.c
    │   │   │   ├── kiss_fft.c
    │   │   │   ├── mwdlp10net_cycvae.c
    │   │   │   ├── nnet.c
    │   │   │   └── wave.c
    │   ├── download_vcc20.sh
    │   ├── get_spk_list.sh
    │   ├── path.sh
    │   ├── run.sh
    │   ├── run_realtime.sh
    │   ├── run_realtime_init.sh
    │   └── run_realtime_mid.sh
    └── mwdlp_vcc20
    │   ├── README.md
    │   ├── cmd.sh
    │   ├── conf
    │       ├── config.yml
    │       └── slurm.conf
    │   ├── demo_realtime
    │       ├── Makefile
    │       ├── demo_anasyn.sh
    │       ├── demo_melsp.sh
    │       ├── inc
    │       │   ├── _kiss_fft_guts.h
    │       │   ├── arch.h
    │       │   ├── common.h
    │       │   ├── freq.h
    │       │   ├── kiss_fft.h
    │       │   ├── mwdlp10net.h
    │       │   ├── mwdlp10net_private.h
    │       │   ├── nnet.h
    │       │   ├── opus_types.h
    │       │   ├── tansig_table.h
    │       │   ├── vec.h
    │       │   ├── vec_avx.h
    │       │   ├── vec_neon.h
    │       │   └── wave.h
    │       └── src
    │       │   ├── freq.c
    │       │   ├── kiss_fft.c
    │       │   ├── mwdlp10net.c
    │       │   ├── nnet.c
    │       │   ├── test_mwdlp.c
    │       │   └── wave.c
    │   ├── download_vcc20.sh
    │   ├── get_spk_list.sh
    │   ├── path.sh
    │   ├── run.sh
    │   └── run_realtime.sh
├── src
    ├── bin
    │   ├── calc_stats.py
    │   ├── decode_gru-cycle-melspspkvae-gauss-smpl_ft_spk.py
    │   ├── decode_gru-cycle-melspxlf0capspkvae-gauss-smpl_spk.py
    │   ├── decode_spkidtr_map.py
    │   ├── decode_wavernn_dualgru_compact_lpc_mband_cf.py
    │   ├── dump_mwdlp-10b.py
    │   ├── dump_sparse-cyclevae_init_mwdlp-10b.py
    │   ├── dump_sparse-cyclevae_jnt_mwdlp-10b.py
    │   ├── f0_range.py
    │   ├── feature_extract.py
    │   ├── get_model_indices.py
    │   ├── gf_syn.py
    │   ├── min_pow.py
    │   ├── noise_shaping_emph.py
    │   ├── proc_wav_pqmf.py
    │   ├── sort_frame_list.py
    │   ├── spk_stat.py
    │   ├── train_nstages-sparse-wavernn_dualgru_compact_lpc_mband_10bit_cf_emb.py
    │   ├── train_nstages-sparse-wavernn_dualgru_compact_lpc_mband_10bit_cf_smpl_orgx_emb_v2.py
    │   ├── train_sparse-gru-cycle-melsp-spk-vae-ftdec-gauss-smpl_weightemb_mwdlp_smpl_v2.py
    │   ├── train_sparse-gru-cycle-melsp-spk-vae-gauss-smpl_weightemb_mwdlp_smpl_v2.py
    │   └── train_sparse-gru-cycle-melsp-x-lf0cap-spk-vae-gauss-smpl_weightemb_v2.py
    ├── nets
    │   ├── pqmf.py
    │   └── vcneuvoco.py
    └── utils
    │   ├── dataset.py
    │   ├── parse_options.sh
    │   ├── run.pl
    │   ├── slurm.pl
    │   └── utils.py
└── tools
    ├── Makefile
    └── requirements.txt


/README.md:
--------------------------------------------------------------------------------
 1 | # Low-latency real-time multispeaker voice conversion (VC) with cyclic variational autoencoder (CycleVAE) and multiband WaveRNN using data-driven linear prediction (MWDLP)
 2 | 
 3 | 
 4 | ## Requirements:
 5 | - UNIX
 6 | - 3.6 >= python <= 3.9
 7 | - CUDA 11.1
 8 | - virtualenv
 9 | - jq
10 | - make
11 | - gcc
12 | 
13 | 
14 | ## Installation
15 | ```
16 | $ cd tools
17 | $ make
18 | $ cd ..
19 | ```
20 | 
21 | 
22 | ## Latest version
23 | - 3.1 (2021/09/25)
24 |     - Finalize VC and MWDLP Python implementations (impl.)
25 |     - Bug fixes on C impl. to match the output of Python impl.
26 |     - Fix input segmental convolution impl. as in original papers while allowing usage in real-time demo
27 |     - Update MWDLP demo and samples with VCC20 dataset
28 |     - Update VC demo and samples with VCC20 dataset
29 | 
30 | 
31 | ## Compilable demo
32 | 
33 | - MWDLP: [demo_mwdlp-10bit_emb-v2_vcc20](https://drive.google.com/file/d/1hR7N-iCSUMNx9P-pDVxftGIIKLLyXsnt/view?usp=sharing)
34 | 
35 | - VC: [demo_sparse-cyclevae-weightembv2-smpl_jnt_mwdlp-10bit_emb_vcc20](https://drive.google.com/file/d/1LtuQmnUP45iWoREbPK0vBTdu2tDZKYeT/view?usp=sharing)
36 | 
37 | 
38 | ## Samples from compilable demo
39 | 
40 | - MWDLP: [samples_demo_mwdlp-10bit_emb-v2_vcc20](https://drive.google.com/drive/folders/1by_BO-fkeouDgTZBWEeu6EnzaX8UgHL8?usp=sharing)
41 | 
42 | - VC: [samples_demo_sparse-cyclevae-weightembv2-smpl_jnt_mwdlp-10bit_emb_vcc20](https://drive.google.com/drive/folders/1PanNaqsOccCImHECywzsaX6mFwausznz?usp=sharing)
43 | 
44 | 
45 | ## Steps to build the models:
46 | 1. Data preparation and preprocessing
47 | 2. VC and neural vocoder models training [~ 2.5 and 4 days each, respectively]
48 | 3. VC fine-tuning with fixed neural vocoder [~ 2.5 days]
49 | 4. VC decoder fine-tuning with fixed encoder and neural vocoder [~ 2.5 days]
50 | 
51 | 
52 | ## Steps for real-time low-latency decoding with CPU:
53 | 1. Dump and compile models
54 | 2. Decode
55 | 
56 | Real-time implementation is based on [LPCNet](https://github.com/mozilla/LPCNet/).
57 | 
58 | 
59 | ## Details
60 | 
61 | Please see **egs/cycvae_mwdlp_vcc20/README.md** for more details on VC + neural vocoder
62 | 
63 | or
64 | 
65 | **egs/mwdlp_vcc20/README.md** for more details on neural vocoder only.
66 | 
67 | 
68 | ## References
69 | 
70 | [1] [High-Fidelity and Low-Latency Universal Neural Vocoder based on Multiband WaveRNN with Data-Driven Linear Prediction for Discrete Waveform Modeling](https://arxiv.org/abs/2105.09856.pdf)
71 | 
72 | [2] [Low-latency real-time non-parallel voice conversion based on cyclic variational autoencoder and multiband WaveRNN with data-driven linear prediction](https://arxiv.org/pdf/2105.09858.pdf)
73 | 
74 | 
75 | ## Contact
76 | 
77 | Patrick Lumban Tobing
78 | 
79 | patrickltobing@gmail.com
80 | 
81 | patrick.lumbantobing@g.sp.m.is.nagoya-u.ac.jp
82 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/README.md:
--------------------------------------------------------------------------------
 1 | # Real-time low-latency multispeaker VC with cyclic variational autoencoder (CycleVAE) and multiband WaveRNN using data-driven linear prediction (MWDLP)
 2 | 
 3 | 
 4 | This package uses Voice Conversion Challenge 2020 dataset [VCC20](http://vc-challenge.org/).
 5 | 
 6 | Real-time implementation is based on [LPCNet](https://github.com/mozilla/LPCNet/).
 7 | 
 8 | 
 9 | ## Data preparation
10 | ```
11 | $ bash download_vcc20.sh
12 | ```
13 | 
14 | ## Data preprocessing
15 | 1. Open **run.sh**
16 | 2. Set `stage=0init123`
17 | 3. Set a value of `n_jobs=` for number of parallel threads in preprocessing
18 | 4. `$ bash run.sh`
19 | 
20 | 
21 | ## VC model training [~ 2.5 days]
22 | 1. Open **run.sh**
23 | 2. Set `stage=4`
24 | 3. Set a value of `GPU_device=` for GPU device selection
25 | 4. `$ bash run.sh`
26 | 
27 | 
28 | ## Neural vocoder training [~ 4 days]
29 | 1. Open **run.sh**
30 | 2. Set `stage=5`
31 | 3. Set a value of `GPU_device=` for GPU device selection
32 | 4. `$ bash run.sh`
33 | 
34 | 
35 | ## VC fine-tuning with fixed neural vocoder [~ 2.5 days]
36 | 1. Open **run.sh**
37 | 2. Set `stage=6`
38 | 3. Set a value of `GPU_device=` for GPU device selection
39 | 4. `$ bash run.sh`
40 | 
41 | 
42 | ## VC decoder fine-tuning with fixed encoder and neural vocoder [~ 2.5 days]
43 | 1. Open **run.sh**
44 | 2. Set `stage=6`
45 | 3. Set a value of `GPU_device=` for GPU device selection
46 | 4. `$ bash run.sh`
47 | 
48 | 
49 | ## Compile CPU real-time program
50 | 1. Open **run_realtime.sh**
51 | 2. Set `stage=0`
52 | 3. `$ bash run_realtime.sh`
53 | 
54 | 
55 | ## Decode with target speaker points
56 | 1. Open **run_realtime.sh**
57 | 2. Set `stage=3`
58 | 3. Set values in `spks_src_dec=` for source speakers
59 | 4. Set values in `spks_trg_dec=` for target speakers
60 | 5. `$ bash run_realtime.sh`
61 | 
62 | 
63 | ## Decode with interpolated speaker points
64 | 1. Open **run_realtime.sh**
65 | 2. Set `stage=4`
66 | 3. Set values in `spks_src_dec=` for source speakers
67 | 4. Set a value in `n_interp=` for number of interpolated points
68 | 5. `$ bash run_realtime.sh`
69 | 
70 | 
71 | ## Contact
72 | 
73 | Patrick Lumbantobing
74 | 
75 | patrickltobing@gmail.com
76 | 
77 | patrick.lumbantobing@g.sp.m.is.nagoya-u.ac.jp
78 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/cmd.sh:
--------------------------------------------------------------------------------
 1 | # you can change cmd.sh depending on what type of queue you are using.
 2 | # If you have no queueing system and want to run on a local machine, you
 3 | # can change all instances 'queue.pl' to run.pl (but be careful and run
 4 | # commands one by one: most recipes will exhaust the memory on your
 5 | # machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
 6 | # with slurm.  Different queues are configured differently, with different
 7 | # queue names and different ways of specifying things like memory;
 8 | # to account for these differences you can create and edit the file
 9 | # conf/queue.conf to match your queue's configuration.  Search for
10 | # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
11 | # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
12 | 
13 | # for local
14 | export train_cmd="run.pl"
15 | export cuda_cmd="run.pl --gpu 1"
16 | export max_jobs=1
17 | 
18 | # for slurm (you can change configuration file "conf/slurm.conf")
19 | # export train_cmd="slurm.pl --config conf/slurm.conf"
20 | # export cuda_cmd="slurm.pl --hpc_gpu 1 --config conf/slurm.conf"
21 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/conf/config.yml:
--------------------------------------------------------------------------------
  1 | ## Sampling rate
  2 | #fs: 8000
  3 | #fs: 16000
  4 | #fs: 22050
  5 | fs: 24000
  6 | #fs: 44100
  7 | #fs: 48000
  8 | 
  9 | ## Frame-shift mel-cep/mel-spec extraction
 10 | shiftms: 10
 11 | 
 12 | ## Window length mel-spectrogram extraction
 13 | winms: 27.5
 14 | 
 15 | ## DC-component removal
 16 | highpass_cutoff: 65
 17 | 
 18 | ## Mel-cepstrum
 19 | mcep_dim: 49
 20 | 
 21 | ## Mel-spectrogram
 22 | mel_dim: 80
 23 | 
 24 | ## Pre-emphasis noise-shaping coefficient
 25 | alpha: 0.85
 26 | 
 27 | ## GRU hidden units encoder, decoder spec, decoder excit
 28 | hidden_units_enc: 512
 29 | hidden_units_dec: 640
 30 | 
 31 | hidden_units_lf0: 128
 32 | 
 33 | ## GRU hidden units wavernn
 34 | ## for maximum performance, use 1184; for feasible usage use 1024
 35 | hidden_units_wave: 1024
 36 | #hidden_units_wave: 1184
 37 | 
 38 | hidden_units_wave_2: 32
 39 | 
 40 | ## Output dimension of FC layer before GRU for wavernn
 41 | s_dim: 320
 42 | 
 43 | ## Output dimension of DualFC before final FC layer
 44 | mid_dim: 16
 45 | 
 46 | ## GRU hidden layers encoder, decoder spec, decoder excit
 47 | hidden_layers_enc: 1
 48 | hidden_layers_dec: 1
 49 | hidden_layers_lf0: 1
 50 | 
 51 | ## GRU hidden layers wavernn
 52 | hidden_layers_wave: 1
 53 | 
 54 | ## kernel-size input conv encoder
 55 | kernel_size_enc: 5
 56 | dilation_size_enc: 1
 57 | 
 58 | ## kernel-size input conv decoder spec
 59 | kernel_size_dec: 5
 60 | dilation_size_dec: 1
 61 | 
 62 | ## kernel-size input conv encoder excit
 63 | kernel_size_lf0: 7
 64 | dilation_size_lf0: 1
 65 | 
 66 | ## kernel-size input conv wavernn
 67 | kernel_size_wave: 7
 68 | dilation_size_wave: 1
 69 | 
 70 | right_size_dec: 0
 71 | right_size_lf0: 0
 72 | 
 73 | ## use dense relu after segmental convolution
 74 | #s_conv_flag: false
 75 | s_conv_flag: true
 76 | 
 77 | ## use segmental convolution
 78 | #seg_conv_flag: false
 79 | seg_conv_flag: true
 80 | 
 81 | ## number of cycles :  n_half_cyc // 2
 82 | n_half_cyc: 2
 83 | 
 84 | ## spect. latent dim.
 85 | lat_dim: 96
 86 | 
 87 | ## excit. latent dim.
 88 | lat_dim_e: 32
 89 | 
 90 | ## use causal input convolution
 91 | ## if using skewed input convolution for encoder (right_size > 0), set causal_conv dec/lf0 to true
 92 | ## always use non-causal input conv. for encoder/wave
 93 | causal_conv_enc: false
 94 | causal_conv_dec: true
 95 | causal_conv_lf0: true
 96 | causal_conv_wave: false
 97 | ## if right_size > 0 (skewed conv encoder, i.e., future frame is limited), set causal_conv dec/lf0 to true [low-latency/real-time proc.]
 98 | 
 99 | ## use segmental convolution for wavernn
100 | #seg_conv_flag_wave: false
101 | seg_conv_flag_wave: true
102 | 
103 | ## use dim-reduction for cont. spk. embed from 1-hot code
104 | #spkidtr_dim: 0
105 | spkidtr_dim: 2
106 | ## to interpolate between 2-dim cont. spk-space
107 | 
108 | ## number of factorization for speaker embedding
109 | n_weight_emb: 4
110 | 
111 | ## learning rate
112 | lr: 1e-4
113 | 
114 | ## dropout rate
115 | do_prob: 0.5
116 | 
117 | ## maximum step count
118 | #step_count: 20
119 | step_count: 1035000
120 | #step_count_wave: 20
121 | step_count_wave: 4350000
122 | 
123 | # number of workers (threads) for batch data handling
124 | n_workers: 1
125 | #n_workers: 2
126 | 
127 | mdl_name_vc: cycmelspxlf0capspkvae-gauss-smpl_sparse_weightemb_v2
128 | mdl_name_ft: cycmelspspkvae-gauss-smpl_sparse_weightemb_mwdlp_smpl_v2
129 | mdl_name_sp: cycmelspspkvae-ftdec-gauss-smpl_sparse_wemb_mwdlp_smpl_v2
130 | 
131 | mdl_name_wave: wavernn_dualgru_compact_lpc_mband_10bit_cf_stft_emb_v2
132 | 
133 | ## sparsification scheduling settings for cyclevae
134 | #t_start_cycvae: 1
135 | t_start_cycvae: 5000
136 | #t_end_cycvae: 20
137 | t_end_cycvae: 245000
138 | #interval_cycvae: 1
139 | interval_cycvae: 10
140 | densities_cycvae_enc: 0.685-0.685-0.88
141 | densities_cycvae_dec: 0.685-0.685-0.88
142 | n_stage_cycvae: 4
143 | 
144 | ## sparsification scheduling settings for wavernn
145 | #t_start: 1
146 | t_start: 20000
147 | #t_end: 20
148 | t_end: 1070000
149 | #interval: 1
150 | interval: 20
151 | densities: 0.09-0.09-0.12
152 | n_stage: 4
153 | 
154 | ## number of data-driven linear predictive coefficients (LPC) in wavernn
155 | #lpc: 0
156 | lpc: 8
157 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/conf/slurm.conf:
--------------------------------------------------------------------------------
 1 | command sbatch --export=PATH  --ntasks-per-node=1
 2 | option time=* --time $0
 3 | option mem=* --mem-per-cpu $0
 4 | option mem=0
 5 | option num_threads=* --cpus-per-task $0 --ntasks-per-node=1
 6 | option num_threads=1 --cpus-per-task 1  --ntasks-per-node=1
 7 | default gpu=0
 8 | option gpu=0 -p all,hpc
 9 | option hpc_gpu=* -p hpc --gres=gpu:$0 --time 10-00:00:00
10 | option all_gpu=* -p all,hpc --gres=gpu:$0 --time 10-00:00:00
11 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/Makefile:
--------------------------------------------------------------------------------
 1 | LDIR = lib
 2 | $(shell mkdir -p $(LDIR))
 3 | LIBNAME = mwdlp10cycvae
 4 | OUT = ${LDIR}/lib${LIBNAME}.a
 5 | 
 6 | CC = gcc
 7 | CFLAGS = -mavx2 -mfma -g -O3 -Wall -W -Wextra -fpic
 8 | LFLAGS = -lm
 9 | 
10 | ODIR = obj
11 | $(shell mkdir -p $(ODIR))
12 | 
13 | SDIR = src
14 | IDIR = inc
15 | BDIR = bin
16 | $(shell mkdir -p $(BDIR))
17 | 
18 | INC = -I${IDIR}
19 | LIB = -L${LDIR} -l${LIBNAME}
20 | TARGET = test_cycvae_mwdlp
21 | 
22 | _OBJS = nnet.o mwdlp10net_cycvae.o kiss_fft.o freq.o wave.o nnet_data.o nnet_cv_data.o
23 | OBJS = $(patsubst %,$(ODIR)/%,$(_OBJS))
24 | 
25 | 
26 | all: ${OUT}
27 | 	$(CC) $(CFLAGS) ${SDIR}/${TARGET}.c $(INC) ${LIB} ${LFLAGS} -o ${BDIR}/${TARGET}
28 | 
29 | $(OUT): $(OBJS) 
30 | 	ar rvs $(OUT) $^
31 | 
32 | $(ODIR)/%.o: $(SDIR)/%.c
33 | 	$(CC) $(CFLAGS) $(INC) -c -o $@ $< ${LFLAGS}
34 | 
35 | .PHONY: clean
36 | 
37 | clean:
38 | 	rm -f $(ODIR)/*.o $(OUT) ${BDIR}/${TARGET}
39 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/demo_anasyn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | #in_dir=wav_8kHz
 5 | #in_dir=wav_16kHz
 6 | in_dir=wav_24kHz
 7 | #out_dir=wav_anasyn_8kHz
 8 | #out_dir=wav_anasyn_16kHz
 9 | out_dir=wav_anasyn_24kHz
10 | 
11 | mkdir -p $out_dir
12 | 
13 | ls ${in_dir}/*.wav > tmp_anasyn.list
14 | 
15 | while read line;do
16 |     name=`basename $line`
17 |     echo $line $name
18 |     ./bin/test_mwdlp $line ${out_dir}/$name
19 |     #./bin/test_mwdlp.exe $line ${out_dir}/$name
20 | done < tmp_anasyn.list
21 | 
22 | rm -f tmp_anasyn.list
23 | 
24 | #split=(${line// / })
25 | #for spk in ${spks[@]};do
26 | #    spk_idx=$(( ${spk_idx}+1  ))
27 | #count=`expr $count + 1`
28 | #done
29 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/demo_interp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | x_coords=(0.00 -0.05 -0.10 -0.15 -0.20 -0.25 -0.30 -0.40)
 4 | y_coords=(0.00 -0.05 -0.10 -0.15 -0.20 -0.25 -0.30 -0.35 -0.40 -0.45)
 5 | 
 6 | file_idx=001
 7 | src_spk=p326
 8 | 
 9 | in_dir=wav
10 | out_dir=wav_cv_interp
11 | 
12 | mkdir -p $out_dir
13 | 
14 | for x in ${x_coords[@]};do
15 | for y in ${y_coords[@]};do
16 |     echo $file_idx $src_spk to $x $y
17 |     ./bin/test_cycvae_mwdlp -c $x $y ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${file_idx}_${src_spk}-interpolate_${x}_${y}.wav 
18 |     #./bin/test_cycvae_mwdlp.exe -c $x $y ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${file_idx}_${src_spk}-interpolate_${x}_${y}.wav 
19 | done
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/demo_melsp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | in_dir=wav
 5 | out_dir=wav_melsp
 6 | 
 7 | mkdir -p $out_dir
 8 | 
 9 | #ls ${in_dir}/*.wav > tmp.list
10 | #
11 | #while read line;do
12 | #    name=`basename $line .wav`
13 | #    echo $line $name
14 | #    ./bin/test_mwdlp -o melsp.bin melsp.txt $line ${out_dir}/${name}_anasyn.wav
15 | #    ./bin/test_mwdlp -b melsp.bin ${out_dir}/${name}_binsyn.wav
16 | #    ./bin/test_mwdlp -t melsp.txt ${out_dir}/${name}_txtsyn.wav
17 | #    ./bin/test_mwdlp.exe -o melsp.bin melsp.txt $line ${out_dir}/${name}_anasyn.wav
18 | #    ./bin/test_mwdlp.exe -b melsp.bin ${out_dir}/${name}_binsyn.wav
19 | #    ./bin/test_mwdlp.exe -t melsp.txt ${out_dir}/${name}_txtsyn.wav
20 | #done < tmp.list
21 | #
22 | #rm -f tmp.list
23 | 
24 | line=${in_dir}/001_p326.wav
25 | name=`basename $line .wav`
26 | 
27 | ./bin/test_mwdlp -o ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_melsp.txt $line ${out_dir}/${name}_anasyn.wav
28 | ./bin/test_mwdlp -b ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_binsyn.wav
29 | ./bin/test_mwdlp -t ${out_dir}/${name}_melsp.txt ${out_dir}/${name}_txtsyn.wav
30 | #./bin/test_mwdlp.exe -o ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_melsp.txt $line ${out_dir}/${name}_anasyn.wav
31 | #./bin/test_mwdlp.exe -b ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_binsyn.wav
32 | #./bin/test_mwdlp.exe -t ${out_dir}/${name}_melsp.txt ${out_dir}/${name}_txtsyn.wav
33 | 
34 | #split=(${line// / })
35 | #for spk in ${spks[@]};do
36 | #    spk_idx=$(( ${spk_idx}+1  ))
37 | #count=`expr $count + 1`
38 | #done
39 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/demo_point.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | spks=(SEF1 SEF2 SEM1 SEM2 TFM1 TGM1 TMM1 TEF1 TEM1 TEF2 TEM2 TFF1 TGF1 TMF1)
 4 | 
 5 | file_idx=E10061
 6 | src_spk=SEF2
 7 | 
 8 | in_dir=wav
 9 | out_dir=wav_cv_point
10 | 
11 | mkdir -p $out_dir
12 | 
13 | spk_idx=1
14 | 
15 | for spk in ${spks[@]};do
16 |     echo $file_idx $src_spk to $spk $spk_idx
17 |     ./bin/test_cycvae_mwdlp -i $spk_idx ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${spk_idx}_${file_idx}_${src_spk}-${spk}.wav 
18 |     #./bin/test_cycvae_mwdlp.exe -i $spk_idx ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${spk_idx}_${file_idx}_${src_spk}-${spk}.wav 
19 |     spk_idx=$(( ${spk_idx}+1  ))
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/inc/_kiss_fft_guts.h:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2003-2004, Mark Borgerding
  2 | 
  3 |   All rights reserved.
  4 | 
  5 |   Redistribution and use in source and binary forms, with or without
  6 |    modification, are permitted provided that the following conditions are met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright notice,
  9 |        this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright notice,
 11 |        this list of conditions and the following disclaimer in the
 12 |        documentation and/or other materials provided with the distribution.
 13 | 
 14 |   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 |   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 |   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 |   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 18 |   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 19 |   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 20 |   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 |   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 22 |   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23 |   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24 |   POSSIBILITY OF SUCH DAMAGE.*/
 25 | 
 26 | #ifndef KISS_FFT_GUTS_H
 27 | #define KISS_FFT_GUTS_H
 28 | 
 29 | #define MIN(a,b) ((a)<(b) ? (a):(b))
 30 | #define MAX(a,b) ((a)>(b) ? (a):(b))
 31 | 
 32 | /* kiss_fft.h
 33 |    defines kiss_fft_scalar as either short or a float type
 34 |    and defines
 35 |    typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
 36 | #include "kiss_fft.h"
 37 | 
 38 | /*
 39 |   Explanation of macros dealing with complex math:
 40 | 
 41 |    C_MUL(m,a,b)         : m = a*b
 42 |    C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
 43 |    C_SUB( res, a,b)     : res = a - b
 44 |    C_SUBFROM( res , a)  : res -= a
 45 |    C_ADDTO( res , a)    : res += a
 46 |  * */
 47 | #ifdef FIXED_POINT
 48 | #include "arch.h"
 49 | 
 50 | 
 51 | #define SAMP_MAX 2147483647
 52 | #define TWID_MAX 32767
 53 | #define TRIG_UPSCALE 1
 54 | 
 55 | #define SAMP_MIN -SAMP_MAX
 56 | 
 57 | 
 58 | #   define S_MUL(a,b) MULT16_32_Q15(b, a)
 59 | 
 60 | #   define C_MUL(m,a,b) \
 61 |       do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
 62 |           (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
 63 | 
 64 | #   define C_MULC(m,a,b) \
 65 |       do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
 66 |           (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
 67 | 
 68 | #   define C_MULBYSCALAR( c, s ) \
 69 |       do{ (c).r =  S_MUL( (c).r , s ) ;\
 70 |           (c).i =  S_MUL( (c).i , s ) ; }while(0)
 71 | 
 72 | #   define DIVSCALAR(x,k) \
 73 |         (x) = S_MUL(  x, (TWID_MAX-((k)>>1))/(k)+1 )
 74 | 
 75 | #   define C_FIXDIV(c,div) \
 76 |         do {    DIVSCALAR( (c).r , div);  \
 77 |                 DIVSCALAR( (c).i  , div); }while (0)
 78 | 
 79 | #define  C_ADD( res, a,b)\
 80 |     do {(res).r=ADD32_ovflw((a).r,(b).r);  (res).i=ADD32_ovflw((a).i,(b).i); \
 81 |     }while(0)
 82 | #define  C_SUB( res, a,b)\
 83 |     do {(res).r=SUB32_ovflw((a).r,(b).r);  (res).i=SUB32_ovflw((a).i,(b).i); \
 84 |     }while(0)
 85 | #define C_ADDTO( res , a)\
 86 |     do {(res).r = ADD32_ovflw((res).r, (a).r);  (res).i = ADD32_ovflw((res).i,(a).i);\
 87 |     }while(0)
 88 | 
 89 | #define C_SUBFROM( res , a)\
 90 |     do {(res).r = ADD32_ovflw((res).r,(a).r);  (res).i = SUB32_ovflw((res).i,(a).i); \
 91 |     }while(0)
 92 | 
 93 | #if defined(OPUS_ARM_INLINE_ASM)
 94 | #include "arm/kiss_fft_armv4.h"
 95 | #endif
 96 | 
 97 | #if defined(OPUS_ARM_INLINE_EDSP)
 98 | #include "arm/kiss_fft_armv5e.h"
 99 | #endif
100 | #if defined(MIPSr1_ASM)
101 | #include "mips/kiss_fft_mipsr1.h"
102 | #endif
103 | 
104 | #else  /* not FIXED_POINT*/
105 | 
106 | #   define S_MUL(a,b) ( (a)*(b) )
107 | #define C_MUL(m,a,b) \
108 |     do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
109 |         (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
110 | #define C_MULC(m,a,b) \
111 |     do{ (m).r = (a).r*(b).r + (a).i*(b).i;\
112 |         (m).i = (a).i*(b).r - (a).r*(b).i; }while(0)
113 | 
114 | #define C_MUL4(m,a,b) C_MUL(m,a,b)
115 | 
116 | #   define C_FIXDIV(c,div) /* NOOP */
117 | #   define C_MULBYSCALAR( c, s ) \
118 |     do{ (c).r *= (s);\
119 |         (c).i *= (s); }while(0)
120 | #endif
121 | 
122 | #ifndef CHECK_OVERFLOW_OP
123 | #  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
124 | #endif
125 | 
126 | #ifndef C_ADD
127 | #define  C_ADD( res, a,b)\
128 |     do { \
129 |             CHECK_OVERFLOW_OP((a).r,+,(b).r)\
130 |             CHECK_OVERFLOW_OP((a).i,+,(b).i)\
131 |             (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
132 |     }while(0)
133 | #define  C_SUB( res, a,b)\
134 |     do { \
135 |             CHECK_OVERFLOW_OP((a).r,-,(b).r)\
136 |             CHECK_OVERFLOW_OP((a).i,-,(b).i)\
137 |             (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
138 |     }while(0)
139 | #define C_ADDTO( res , a)\
140 |     do { \
141 |             CHECK_OVERFLOW_OP((res).r,+,(a).r)\
142 |             CHECK_OVERFLOW_OP((res).i,+,(a).i)\
143 |             (res).r += (a).r;  (res).i += (a).i;\
144 |     }while(0)
145 | 
146 | #define C_SUBFROM( res , a)\
147 |     do {\
148 |             CHECK_OVERFLOW_OP((res).r,-,(a).r)\
149 |             CHECK_OVERFLOW_OP((res).i,-,(a).i)\
150 |             (res).r -= (a).r;  (res).i -= (a).i; \
151 |     }while(0)
152 | #endif /* C_ADD defined */
153 | 
154 | #ifdef FIXED_POINT
155 | /*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
156 | #  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
157 | #  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))
158 | #  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))
159 | #  define HALF_OF(x) ((x)>>1)
160 | #elif defined(USE_SIMD)
161 | #  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
162 | #  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
163 | #  define HALF_OF(x) ((x)*_mm_set1_ps(.5f))
164 | #else
165 | #  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
166 | #  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
167 | #  define HALF_OF(x) ((x)*.5f)
168 | #endif
169 | 
170 | #define  kf_cexp(x,phase) \
171 |         do{ \
172 |                 (x)->r = KISS_FFT_COS(phase);\
173 |                 (x)->i = KISS_FFT_SIN(phase);\
174 |         }while(0)
175 | 
176 | #define  kf_cexp2(x,phase) \
177 |    do{ \
178 |       (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
179 |       (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
180 | }while(0)
181 | 
182 | #endif /* KISS_FFT_GUTS_H */
183 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/inc/common.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef COMMON_H
 4 | #define COMMON_H
 5 | 
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | #include <math.h>
 9 | 
10 | #define RNN_INLINE inline
11 | #define OPUS_INLINE inline
12 | 
13 | #define LOG256 5.5451774445f
14 | static RNN_INLINE float log2_approx(float x)
15 | {
16 |    int integer;
17 |    float frac;
18 |    union {
19 |       float f;
20 |       int i;
21 |    } in;
22 |    in.f = x;
23 |    integer = (in.i>>23)-127;
24 |    in.i -= integer<<23;
25 |    frac = in.f - 1.5f;
26 |    frac = -0.41445418f + frac*(0.95909232f
27 |           + frac*(-0.33951290f + frac*0.16541097f));
28 |    return 1+integer+frac;
29 | }
30 | 
31 | #define log_approx(x) (0.69315f*log2_approx(x))
32 | 
33 | 
34 | /** Copy n elements from src to dst. The 0* term provides compile-time type checking  */
35 | #ifndef OVERRIDE_RNN_COPY
36 | #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
37 | #endif
38 | 
39 | /** Copy n elements from src to dst, allowing overlapping regions. The 0* term
40 |     provides compile-time type checking */
41 | #ifndef OVERRIDE_RNN_MOVE
42 | #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
43 | #endif
44 | 
45 | /** Set n elements of dst to zero */
46 | #ifndef OVERRIDE_RNN_CLEAR
47 | #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
48 | #endif
49 | 
50 | 
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/inc/freq.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2017-2018 Mozilla */
 2 | /*
 3 |    Redistribution and use in source and binary forms, with or without
 4 |    modification, are permitted provided that the following conditions
 5 |    are met:
 6 | 
 7 |    - Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 |    - Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Dec. 2020,
27 |    marked by PLT_Dec20 */
28 | 
29 | #include "freq_conf.h"
30 | #include "kiss_fft.h"
31 | 
32 | 
33 | //PLT_Dec20
34 | /*
35 |     Define these on freq_conf.h
36 |     SAMPLING_FREQUENCY 16000 //fs
37 |     FRAME_SHIFT 80 //int((fs/1000)*shiftms); shiftms = 5 ms
38 |     WINDOW_LENGTH 440 //int((fs/1000)*winms); winms = 27.5 ms
39 |     FFT_LENGTH 1024 //fs=8kHz-16kHz: 1024; 22.05kHz-24kHz: 2048; 44.1kHz-48kHz: 4096
40 |     HPASS_FILT_TAPS 1023 //order+1, has to be odd because high-pass filter passes nyq. freq.
41 | */
42 | 
43 | #define WINDOW_LENGTH_1 (WINDOW_LENGTH - 1)
44 | #define WINDOW_LENGTH_2 (WINDOW_LENGTH_1 - 1) //for indexing right side window buffer
45 | 
46 | #define HALF_WINDOW_LENGTH_1 (WINDOW_LENGTH_1 / 2) //does not include 1st [0] and (1+((N-1)/2)+((N-1)%2))th [1] if (N-1)%2 == 1
47 | 
48 | #define WIN_PAD (FFT_LENGTH - WINDOW_LENGTH) //window is centered on total FFT length
49 | 
50 | #define WIN_PAD_LEFT (WIN_PAD / 2)
51 | #define WIN_PAD_RIGHT (WIN_PAD_LEFT + (WIN_PAD % 2)) //right pad is more than 1 if total pad is odd
52 | 
53 | #define HALF_FFT_LENGTH (FFT_LENGTH / 2)
54 | 
55 | #define LEFT_SAMPLES (HALF_FFT_LENGTH - WIN_PAD_LEFT) //samples at left-side window / reflected samples at the left edge
56 | #define RIGHT_SAMPLES (HALF_FFT_LENGTH - WIN_PAD_RIGHT) //samples at right-side window / reflected samples at the right edge
57 | 
58 | #define LEFT_SAMPLES_1 (LEFT_SAMPLES - 1) //for indexing first frame samples
59 | #define LEFT_SAMPLES_2 (LEFT_SAMPLES_1 - 1) //for indexing first frame reflected samples
60 | #define RIGHT_SAMPLES_1 (RIGHT_SAMPLES - 1) //for indexing first frame samples
61 | 
62 | #define WIN_LEFT_IDX (WIN_PAD_LEFT + 1) //0->439, index of centered 1st in total FFT-length, exclude first sample (+1) [0 coefficient]
63 | #define WIN_RIGHT_IDX (WIN_LEFT_IDX - 1 + WINDOW_LENGTH - 1) //0->439, index of centered 440th in total FFT-length
64 | 
65 | #define BUFFER_LENGTH (WINDOW_LENGTH_1 - FRAME_SHIFT) //store samples for proceeding frame
66 | 
67 | #define HPASS_FILT_TAPS_1 (HPASS_FILT_TAPS - 1)
68 | 
69 | #define MAGSP_DIM (HALF_FFT_LENGTH + 1)
70 | #define MELSP_MAGSP_DIM (MAGSP_DIM * MEL_DIM)
71 | 
72 | 
73 | //PLT_Dec20
74 | typedef struct {
75 |     kiss_fft_state *kfft;
76 |     float hpass_filt[HPASS_FILT_TAPS];
77 |     float half_window[HALF_WINDOW_LENGTH_1];
78 |     float samples_hpass[HPASS_FILT_TAPS];
79 |     float samples_win[WINDOW_LENGTH_1]; //exclude first sample because of coefficient 0
80 |     kiss_fft_cpx in_fft[FFT_LENGTH]; //initialized with zeros, fill in only centered window_length
81 |     kiss_fft_cpx out_fft[FFT_LENGTH];
82 |     float magsp[MAGSP_DIM];
83 |     float melfb[MELSP_MAGSP_DIM];
84 | } DSPState;
85 | 
86 | int dspstate_get_size();
87 | 
88 | DSPState *dspstate_create();
89 | 
90 | void dspstate_destroy(DSPState *dsp);
91 | 
92 | void shift_apply_hpassfilt(DSPState *dsp, float *x);
93 | 
94 | void apply_window(DSPState *dsp);
95 | 
96 | void shift_apply_window(DSPState *dsp, const float *x);
97 | 
98 | void mel_spec_extract(DSPState *dsp, float *melsp);
99 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/inc/mwdlp10net_cycvae.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2018 Mozilla */
 2 | /*
 3 |    Redistribution and use in source and binary forms, with or without
 4 |    modification, are permitted provided that the following conditions
 5 |    are met:
 6 | 
 7 |    - Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 |    - Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Dec. 2020 - Aug. 2021,
27 |    marked by PLT_<MonthYear> */
28 | 
29 | //PLT_Dec20
30 | #ifndef _MWDLP10NET_CYCVAE_H_
31 | #define _MWDLP10NET_CYCVAE_H_
32 | 
33 | #ifndef MWDLP10NET_CYCVAE_EXPORT
34 | # if defined(WIN32)
35 | #  if defined(MWDLP10NET_CYCVAE_BUILD) && defined(DLL_EXPORT)
36 | #   define MWDLP10NET_CYCVAE_EXPORT __declspec(dllexport)
37 | #  else
38 | #   define MWDLP10NET_CYCVAE_EXPORT
39 | #  endif
40 | # elif defined(__GNUC__) && defined(MWDLP10NET_CYCVAE_BUILD)
41 | #  define MWDLP10NET_CYCVAE_EXPORT __attribute__ ((visibility ("default")))
42 | # else
43 | #  define MWDLP10NET_CYCVAE_EXPORT
44 | # endif
45 | #endif
46 | 
47 | 
48 | //PLT_Dec20
49 | typedef struct MWDLP10CycleVAEMelspExcitSpkNetState MWDLP10CycleVAEMelspExcitSpkNetState;
50 | 
51 | typedef struct MWDLP10NetState MWDLP10NetState;
52 | 
53 | MWDLP10NET_CYCVAE_EXPORT int mwdlp10cyclevaenet_get_size();
54 | 
55 | MWDLP10NET_CYCVAE_EXPORT MWDLP10CycleVAEMelspExcitSpkNetState *mwdlp10cyclevaenet_create();
56 | 
57 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10cyclevaenet_destroy(MWDLP10CycleVAEMelspExcitSpkNetState *mwdlp10cyclevaenet);
58 | 
59 | MWDLP10NET_CYCVAE_EXPORT int mwdlp10net_get_size();
60 | 
61 | MWDLP10NET_CYCVAE_EXPORT MWDLP10NetState *mwdlp10net_create();
62 | 
63 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_destroy(MWDLP10NetState *mwdlp10net);
64 | 
65 | //PLT_Jul21
66 | MWDLP10NET_CYCVAE_EXPORT void cyclevae_melsp_excit_spk_convert_mwdlp10net_synthesize(
67 |     MWDLP10CycleVAEMelspExcitSpkNetState *st, float *features, float *spk_code_aux,
68 |         short *output, int *n_output, int flag_last_frame);
69 |         //short *output, int *n_output, int flag_last_frame, float *melsp_in_tmp, float *conv_tmp, float *dense_tmp, float *gru_tmp, float *lat_tmp, float *spk_in_tmp, float *spk_red_tmp, float *spk_conv_tmp, float *spk_dense_tmp, float *spk_gru_tmp, float *spk_out_tmp, float *spk_tmp, float *melsp_red_tmp, float *melsp_conv_tmp, float *melsp_dense_tmp, float *melsp_gru_tmp, float *melsp_pdf_tmp, float *melsp_smpl_tmp);
70 | 
71 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_synthesize(MWDLP10NetState *st, const float *features,
72 |     short *output, int *n_output, int flag_last_frame);
73 |     //short *output, int *n_output, int flag_last_frame, float *mwdlp_conv_tmp, float *mwdlp_dense_tmp);
74 | 
75 | //PLT_Jul21
76 | MWDLP10NET_CYCVAE_EXPORT void cyclevae_melsp_excit_spk_convert_mwdlp10net_synthesize_nodlpc(
77 |     MWDLP10CycleVAEMelspExcitSpkNetState *st, float *features, float *spk_code_aux,
78 |         short *output, int *n_output, int flag_last_frame);
79 |         //short *output, int *n_output, int flag_last_frame, float *melsp_in_tmp, float *conv_tmp, float *dense_tmp, float *gru_tmp, float *lat_tmp, float *spk_in_tmp, float *spk_red_tmp, float *spk_conv_tmp, float *spk_dense_tmp, float *spk_gru_tmp, float *spk_out_tmp, float *spk_tmp, float *melsp_red_tmp, float *melsp_conv_tmp, float *melsp_dense_tmp, float *melsp_gru_tmp, float *melsp_pdf_tmp, float *melsp_smpl_tmp);
80 | 
81 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_synthesize_nodlpc(MWDLP10NetState *st, const float *features,
82 |     short *output, int *n_output, int flag_last_frame);
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/inc/opus_types.h:
--------------------------------------------------------------------------------
  1 | /* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */
  2 | /* Modified by Jean-Marc Valin */
  3 | /*
  4 |    Redistribution and use in source and binary forms, with or without
  5 |    modification, are permitted provided that the following conditions
  6 |    are met:
  7 | 
  8 |    - Redistributions of source code must retain the above copyright
  9 |    notice, this list of conditions and the following disclaimer.
 10 | 
 11 |    - Redistributions in binary form must reproduce the above copyright
 12 |    notice, this list of conditions and the following disclaimer in the
 13 |    documentation and/or other materials provided with the distribution.
 14 | 
 15 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 19 |    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 23 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 24 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | */
 27 | /* opus_types.h based on ogg_types.h from libogg */
 28 | 
 29 | /**
 30 |    @file opus_types.h
 31 |    @brief Opus reference implementation types
 32 | */
 33 | #ifndef OPUS_TYPES_H
 34 | #define OPUS_TYPES_H
 35 | 
 36 | /* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
 37 | #if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
 38 | #include <stdint.h>
 39 | 
 40 |    typedef int16_t opus_int16;
 41 |    typedef uint16_t opus_uint16;
 42 |    typedef int32_t opus_int32;
 43 |    typedef uint32_t opus_uint32;
 44 | #elif defined(_WIN32)
 45 | 
 46 | #  if defined(__CYGWIN__)
 47 | #    include <_G_config.h>
 48 |      typedef _G_int32_t opus_int32;
 49 |      typedef _G_uint32_t opus_uint32;
 50 |      typedef _G_int16 opus_int16;
 51 |      typedef _G_uint16 opus_uint16;
 52 | #  elif defined(__MINGW32__)
 53 |      typedef short opus_int16;
 54 |      typedef unsigned short opus_uint16;
 55 |      typedef int opus_int32;
 56 |      typedef unsigned int opus_uint32;
 57 | #  elif defined(__MWERKS__)
 58 |      typedef int opus_int32;
 59 |      typedef unsigned int opus_uint32;
 60 |      typedef short opus_int16;
 61 |      typedef unsigned short opus_uint16;
 62 | #  else
 63 |      /* MSVC/Borland */
 64 |      typedef __int32 opus_int32;
 65 |      typedef unsigned __int32 opus_uint32;
 66 |      typedef __int16 opus_int16;
 67 |      typedef unsigned __int16 opus_uint16;
 68 | #  endif
 69 | 
 70 | #elif defined(__MACOS__)
 71 | 
 72 | #  include <sys/types.h>
 73 |    typedef SInt16 opus_int16;
 74 |    typedef UInt16 opus_uint16;
 75 |    typedef SInt32 opus_int32;
 76 |    typedef UInt32 opus_uint32;
 77 | 
 78 | #elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
 79 | 
 80 | #  include <sys/types.h>
 81 |    typedef int16_t opus_int16;
 82 |    typedef u_int16_t opus_uint16;
 83 |    typedef int32_t opus_int32;
 84 |    typedef u_int32_t opus_uint32;
 85 | 
 86 | #elif defined(__BEOS__)
 87 | 
 88 |    /* Be */
 89 | #  include <inttypes.h>
 90 |    typedef int16 opus_int16;
 91 |    typedef u_int16 opus_uint16;
 92 |    typedef int32_t opus_int32;
 93 |    typedef u_int32_t opus_uint32;
 94 | 
 95 | #elif defined (__EMX__)
 96 | 
 97 |    /* OS/2 GCC */
 98 |    typedef short opus_int16;
 99 |    typedef unsigned short opus_uint16;
100 |    typedef int opus_int32;
101 |    typedef unsigned int opus_uint32;
102 | 
103 | #elif defined (DJGPP)
104 | 
105 |    /* DJGPP */
106 |    typedef short opus_int16;
107 |    typedef unsigned short opus_uint16;
108 |    typedef int opus_int32;
109 |    typedef unsigned int opus_uint32;
110 | 
111 | #elif defined(R5900)
112 | 
113 |    /* PS2 EE */
114 |    typedef int opus_int32;
115 |    typedef unsigned opus_uint32;
116 |    typedef short opus_int16;
117 |    typedef unsigned short opus_uint16;
118 | 
119 | #elif defined(__SYMBIAN32__)
120 | 
121 |    /* Symbian GCC */
122 |    typedef signed short opus_int16;
123 |    typedef unsigned short opus_uint16;
124 |    typedef signed int opus_int32;
125 |    typedef unsigned int opus_uint32;
126 | 
127 | #elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
128 | 
129 |    typedef short opus_int16;
130 |    typedef unsigned short opus_uint16;
131 |    typedef long opus_int32;
132 |    typedef unsigned long opus_uint32;
133 | 
134 | #elif defined(CONFIG_TI_C6X)
135 | 
136 |    typedef short opus_int16;
137 |    typedef unsigned short opus_uint16;
138 |    typedef int opus_int32;
139 |    typedef unsigned int opus_uint32;
140 | 
141 | #else
142 | 
143 |    /* Give up, take a reasonable guess */
144 |    typedef short opus_int16;
145 |    typedef unsigned short opus_uint16;
146 |    typedef int opus_int32;
147 |    typedef unsigned int opus_uint32;
148 | 
149 | #endif
150 | 
151 | #define opus_int         int                     /* used for counters etc; at least 16 bits */
152 | #define opus_int64       long long
153 | #define opus_int8        signed char
154 | 
155 | #define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */
156 | #define opus_uint64      unsigned long long
157 | #define opus_uint8       unsigned char
158 | 
159 | #endif  /* OPUS_TYPES_H */
160 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/inc/tansig_table.h:
--------------------------------------------------------------------------------
 1 | /* This file is auto-generated by gen_tables */
 2 | 
 3 | static const float tansig_table[201] = {
 4 | 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
 5 | 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
 6 | 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
 7 | 0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
 8 | 0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
 9 | 0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
10 | 0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
11 | 0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
12 | 0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
13 | 0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
14 | 0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
15 | 0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
16 | 0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
17 | 0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
18 | 0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
19 | 0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
20 | 0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
21 | 0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
22 | 0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
23 | 0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
24 | 0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
25 | 0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
26 | 0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
27 | 0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
28 | 0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
29 | 0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
30 | 0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
31 | 0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
32 | 0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
33 | 0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
34 | 0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
35 | 0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
36 | 0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
37 | 0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
38 | 0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
39 | 0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
40 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
41 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
42 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
43 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
44 | 1.000000f,
45 | };
46 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/inc/vec.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018 Mozilla
  2 |                  2008-2011 Octasic Inc.
  3 |                  2012-2017 Jean-Marc Valin */
  4 | /*
  5 |    Redistribution and use in source and binary forms, with or without
  6 |    modification, are permitted provided that the following conditions
  7 |    are met:
  8 | 
  9 |    - Redistributions of source code must retain the above copyright
 10 |    notice, this list of conditions and the following disclaimer.
 11 | 
 12 |    - Redistributions in binary form must reproduce the above copyright
 13 |    notice, this list of conditions and the following disclaimer in the
 14 |    documentation and/or other materials provided with the distribution.
 15 | 
 16 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
 20 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 24 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 25 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | /* No AVX2/FMA support */
 29 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Sept.-Dec. 2020,
 30 |    marked by PLT_<Sep/Dec>20 */
 31 | 
 32 | #ifndef VEC_NEON_H
 33 | #define VEC_NEON_H
 34 | 
 35 | //PLT_Dec20
 36 | #include "tansig_table.h"
 37 | 
 38 | static float celt_exp2(float x)
 39 | {
 40 |    int integer;
 41 |    float frac;
 42 |    union {
 43 |       float f;
 44 |       opus_uint32 i;
 45 |    } res;
 46 |    integer = floor(x);
 47 |    if (integer < -50)
 48 |       return 0;
 49 |    frac = x-integer;
 50 |    /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
 51 |    res.f = 0.99992522f + frac * (0.69583354f
 52 |            + frac * (0.22606716f + 0.078024523f*frac));
 53 |    res.i = (res.i + (integer<<23)) & 0x7fffffff;
 54 |    return res.f;
 55 | }
 56 | #define celt_exp(x) celt_exp2((x)*1.44269504f)
 57 | 
 58 | static float tansig_approx(float x)
 59 | {
 60 |     int i;
 61 |     float y, dy;
 62 |     float sign=1;
 63 |     if (x<0)
 64 |     {
 65 |        x=-x;
 66 |        sign=-1;
 67 |     }
 68 |     i = (int)floor(.5f+25*x);
 69 |     i = IMAX(0, IMIN(200, i));
 70 |     x -= .04f*i;
 71 |     y = tansig_table[i];
 72 |     dy = 1-y*y;
 73 |     y = y + x*dy*(1 - y*x);
 74 |     return sign*y;
 75 | }
 76 | 
 77 | static OPUS_INLINE float sigmoid_approx(float x)
 78 | {
 79 |    return .5f + .5f*tansig_approx(.5f*x);
 80 | }
 81 | 
 82 | static void softmax(float *y, const float *x, int N)
 83 | {
 84 |     //int i;
 85 |     for (int i=0;i<N;i++)
 86 |         y[i] = celt_exp(x[i]);
 87 | }
 88 | 
 89 | static void vec_exp(float *y, const float *x, int N)
 90 | {
 91 |     //int i;
 92 |     for (int i=0;i<N;i++)
 93 |         y[i] = exp(x[i]);
 94 | }
 95 | 
 96 | static void vec_tanh(float *y, const float *x, int N)
 97 | {
 98 |     //int i;
 99 |     for (int i=0;i<N;i++)
100 |     {
101 |         y[i] = tansig_approx(x[i]);
102 |     }
103 | }
104 | 
105 | //PLT_Feb21
106 | static void vec_tanh_exp(float *y, const float *x, int N)
107 | {
108 |     //int i;
109 |     float ex2;
110 |     for (int i=0;i<N;i++)
111 |     {
112 |         //y[i] = tansig_approx(x[i]);
113 |         ex2 = exp(2*x[i]);
114 |         y[i] = (ex2-1)/(ex2+1);
115 |     }
116 | }
117 | 
118 | //PLT_Sep20
119 | static void vec_tanhshrink(float *y, const float *x, int N)
120 | {
121 |     //int i;
122 |     float ex2;
123 |     for (int i=0;i<N;i++)
124 |     {
125 |         //y[i] = x[i]-tansig_approx(x[i]);
126 |         ex2 = exp(2*x[i]);
127 |         y[i] = x[i]-(ex2-1)/(ex2+1);
128 |     }
129 | }
130 | 
131 | static void vec_sigmoid(float *y, const float *x, int N)
132 | {
133 |     //int i;
134 |     for (int i=0;i<N;i++)
135 |     {
136 |         y[i] = sigmoid_approx(x[i]);
137 |     }
138 | }
139 | 
140 | //PLT_Feb21
141 | static void vec_sigmoid_exp(float *y, const float *x, int N)
142 | {
143 |     //int i;
144 |     float ex;
145 |     for (int i=0;i<N;i++)
146 |     {
147 |         //y[i] = sigmoid_approx(x[i]);
148 |         ex = exp(x[i]);
149 |         y[i] = (ex)/(ex+1);
150 |     }
151 | }
152 | 
153 | static void sgemv_accum16(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
154 | {
155 |    int i, j;
156 |    for (i=0;i<rows;i+=16)
157 |    {
158 |       for (j=0;j<cols;j++)
159 |       {
160 |          const float * restrict w;
161 |          float * restrict y;
162 |          float xj;
163 |          w = &weights[j*col_stride + i];
164 |          xj = x[j];
165 |          y = &out[i];
166 |          y[0] += w[0]*xj;
167 |          y[1] += w[1]*xj;
168 |          y[2] += w[2]*xj;
169 |          y[3] += w[3]*xj;
170 |          y[4] += w[4]*xj;
171 |          y[5] += w[5]*xj;
172 |          y[6] += w[6]*xj;
173 |          y[7] += w[7]*xj;
174 |          y[8] += w[8]*xj;
175 |          y[9] += w[9]*xj;
176 |          y[10] += w[10]*xj;
177 |          y[11] += w[11]*xj;
178 |          y[12] += w[12]*xj;
179 |          y[13] += w[13]*xj;
180 |          y[14] += w[14]*xj;
181 |          y[15] += w[15]*xj;
182 |       }
183 |    }
184 | }
185 | 
186 | static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int *idx, const float *x)
187 | {
188 |    int i, j;
189 |    for (i=0;i<rows;i+=16)
190 |    {
191 |       int cols;
192 |       cols = *idx++;
193 |       for (j=0;j<cols;j++)
194 |       {
195 |          float * restrict y;
196 |          float xj;
197 |          xj = x[*idx++];
198 |          y = &out[i];
199 |          y[0] += w[0]*xj;
200 |          y[1] += w[1]*xj;
201 |          y[2] += w[2]*xj;
202 |          y[3] += w[3]*xj;
203 |          y[4] += w[4]*xj;
204 |          y[5] += w[5]*xj;
205 |          y[6] += w[6]*xj;
206 |          y[7] += w[7]*xj;
207 |          y[8] += w[8]*xj;
208 |          y[9] += w[9]*xj;
209 |          y[10] += w[10]*xj;
210 |          y[11] += w[11]*xj;
211 |          y[12] += w[12]*xj;
212 |          y[13] += w[13]*xj;
213 |          y[14] += w[14]*xj;
214 |          y[15] += w[15]*xj;
215 |          w += 16;
216 |       }
217 |    }
218 | }
219 | 
220 | #endif /* VEC_H */
221 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime/inc/wave.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2021 Patrick Lumban Tobing (Nagoya University)
 3 |    Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 4 | 
 5 |    WAV file read/write is based on http://truelogic.org/wordpress/2015/09/04/parsing-a-wav-file-in-c
 6 | */
 7 | 
 8 | 
 9 | // WAVE file header format
10 | struct HEADER {
11 |     unsigned char riff[4];                      // RIFF string
12 |     unsigned int overall_size;               // overall size of file in bytes
13 |     unsigned char wave[4];                      // WAVE string
14 |     unsigned char fmt_chunk_marker[4];          // fmt string with trailing null char
15 |     unsigned int length_of_fmt;                 // length of the format data
16 |     unsigned short format_type;                   // format type. 1-PCM, 3- IEEE float, 6 - 8bit A law, 7 - 8bit mu law
17 |     unsigned short channels;                      // no.of channels
18 |     unsigned int sample_rate;                   // sampling rate (blocks per second)
19 |     unsigned int byterate;                      // SampleRate * NumChannels * BitsPerSample/8
20 |     unsigned short block_align;                   // NumChannels * BitsPerSample/8
21 |     unsigned short bits_per_sample;               // bits per sample, 8- 8bits, 16- 16 bits etc
22 |     unsigned char data_chunk_header [4];        // DATA string or FLLR string
23 |     unsigned int data_size;                     // NumSamples * NumChannels * BitsPerSample/8 - size of the next chunk that will be read
24 | };
25 | 
26 | /*
27 |     Positions   Sample Value    Description
28 |     1 – 4   “RIFF”  Marks the file as a riff file. Characters are each 1 byte long.
29 |     5 – 8   File size (integer)     Size of the overall file – 8 bytes, in bytes (32-bit integer). Typically, you’d fill this in after creation.
30 |     9 -12   “WAVE”  File Type Header. For our purposes, it always equals “WAVE”.
31 |     13-16   “fmt “  Format chunk marker. Includes trailing null
32 |     17-20   16  Length of format data as listed above
33 |     21-22   1   Type of format (1 is PCM) – 2 byte integer
34 |     23-24   2   Number of Channels – 2 byte integer
35 |     25-28   44100   Sample Rate – 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
36 |     29-32   176400  (Sample Rate * BitsPerSample * Channels) / 8.
37 |     33-34   4   (BitsPerSample * Channels) / 8.1 – 8 bit mono2 – 8 bit stereo/16 bit mono4 – 16 bit stereo
38 |     35-36   16  Bits per sample
39 |     37-40   “data”  “data” chunk header. Marks the beginning of the data section.
40 |     41-44   File size (data)    Size of the data section.
41 |     Sample values are given above for a 16-bit stereo source.
42 | 
43 |     It is important to note that the WAV format uses little-endian [LSB in smallest address] format to store bytes,
44 |     so you need to convert the bytes to big-endian [MSB in smallest address] in code for the values to make sense.
45 | */
46 | 
47 | short read_write_wav(FILE *fin, FILE *fout, short *num_reflected_right_edge_samples, long *num_samples, long *size_of_each_sample);
48 | long read_feat_write_wav(FILE* fin, FILE* fout, int bin_flag);
49 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/Makefile:
--------------------------------------------------------------------------------
 1 | LDIR = lib
 2 | $(shell mkdir -p $(LDIR))
 3 | LIBNAME = mwdlp10cycvae
 4 | OUT = ${LDIR}/lib${LIBNAME}.a
 5 | 
 6 | CC = gcc
 7 | CFLAGS = -mavx2 -mfma -g -O3 -Wall -W -Wextra -fpic
 8 | LFLAGS = -lm
 9 | 
10 | ODIR = obj
11 | $(shell mkdir -p $(ODIR))
12 | 
13 | SDIR = src
14 | IDIR = inc
15 | BDIR = bin
16 | $(shell mkdir -p $(BDIR))
17 | 
18 | INC = -I${IDIR}
19 | LIB = -L${LDIR} -l${LIBNAME}
20 | TARGET = test_cycvae_mwdlp
21 | 
22 | _OBJS = nnet.o mwdlp10net_cycvae.o kiss_fft.o freq.o wave.o nnet_data.o nnet_cv_data.o
23 | OBJS = $(patsubst %,$(ODIR)/%,$(_OBJS))
24 | 
25 | 
26 | all: ${OUT}
27 | 	$(CC) $(CFLAGS) ${SDIR}/${TARGET}.c $(INC) ${LIB} ${LFLAGS} -o ${BDIR}/${TARGET}
28 | 
29 | $(OUT): $(OBJS) 
30 | 	ar rvs $(OUT) $^
31 | 
32 | $(ODIR)/%.o: $(SDIR)/%.c
33 | 	$(CC) $(CFLAGS) $(INC) -c -o $@ $< ${LFLAGS}
34 | 
35 | .PHONY: clean
36 | 
37 | clean:
38 | 	rm -f $(ODIR)/*.o $(OUT) ${BDIR}/${TARGET}
39 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/demo_anasyn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | #in_dir=wav_8kHz
 5 | #in_dir=wav_16kHz
 6 | in_dir=wav_24kHz
 7 | #out_dir=wav_anasyn_8kHz
 8 | #out_dir=wav_anasyn_16kHz
 9 | out_dir=wav_anasyn_24kHz
10 | 
11 | mkdir -p $out_dir
12 | 
13 | ls ${in_dir}/*.wav > tmp_anasyn.list
14 | 
15 | while read line;do
16 |     name=`basename $line`
17 |     echo $line $name
18 |     ./bin/test_mwdlp $line ${out_dir}/$name
19 |     #./bin/test_mwdlp.exe $line ${out_dir}/$name
20 | done < tmp_anasyn.list
21 | 
22 | rm -f tmp_anasyn.list
23 | 
24 | #split=(${line// / })
25 | #for spk in ${spks[@]};do
26 | #    spk_idx=$(( ${spk_idx}+1  ))
27 | #count=`expr $count + 1`
28 | #done
29 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/demo_interp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | x_coords=(0.00 -0.05 -0.10 -0.15 -0.20 -0.25 -0.30 -0.40)
 4 | y_coords=(0.00 -0.05 -0.10 -0.15 -0.20 -0.25 -0.30 -0.35 -0.40 -0.45)
 5 | 
 6 | file_idx=001
 7 | src_spk=p326
 8 | 
 9 | in_dir=wav
10 | out_dir=wav_cv_interp
11 | 
12 | mkdir -p $out_dir
13 | 
14 | for x in ${x_coords[@]};do
15 | for y in ${y_coords[@]};do
16 |     echo $file_idx $src_spk to $x $y
17 |     ./bin/test_cycvae_mwdlp -c $x $y ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${file_idx}_${src_spk}-interpolate_${x}_${y}.wav 
18 |     #./bin/test_cycvae_mwdlp.exe -c $x $y ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${file_idx}_${src_spk}-interpolate_${x}_${y}.wav 
19 | done
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/demo_melsp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | in_dir=wav
 5 | out_dir=wav_melsp
 6 | 
 7 | mkdir -p $out_dir
 8 | 
 9 | #ls ${in_dir}/*.wav > tmp.list
10 | #
11 | #while read line;do
12 | #    name=`basename $line .wav`
13 | #    echo $line $name
14 | #    ./bin/test_mwdlp -o melsp.bin melsp.txt $line ${out_dir}/${name}_anasyn.wav
15 | #    ./bin/test_mwdlp -b melsp.bin ${out_dir}/${name}_binsyn.wav
16 | #    ./bin/test_mwdlp -t melsp.txt ${out_dir}/${name}_txtsyn.wav
17 | #    ./bin/test_mwdlp.exe -o melsp.bin melsp.txt $line ${out_dir}/${name}_anasyn.wav
18 | #    ./bin/test_mwdlp.exe -b melsp.bin ${out_dir}/${name}_binsyn.wav
19 | #    ./bin/test_mwdlp.exe -t melsp.txt ${out_dir}/${name}_txtsyn.wav
20 | #done < tmp.list
21 | #
22 | #rm -f tmp.list
23 | 
24 | line=${in_dir}/001_p326.wav
25 | name=`basename $line .wav`
26 | 
27 | ./bin/test_mwdlp -o ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_melsp.txt $line ${out_dir}/${name}_anasyn.wav
28 | ./bin/test_mwdlp -b ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_binsyn.wav
29 | ./bin/test_mwdlp -t ${out_dir}/${name}_melsp.txt ${out_dir}/${name}_txtsyn.wav
30 | #./bin/test_mwdlp.exe -o ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_melsp.txt $line ${out_dir}/${name}_anasyn.wav
31 | #./bin/test_mwdlp.exe -b ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_binsyn.wav
32 | #./bin/test_mwdlp.exe -t ${out_dir}/${name}_melsp.txt ${out_dir}/${name}_txtsyn.wav
33 | 
34 | #split=(${line// / })
35 | #for spk in ${spks[@]};do
36 | #    spk_idx=$(( ${spk_idx}+1  ))
37 | #count=`expr $count + 1`
38 | #done
39 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/demo_point.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | spks=(SEF1 SEF2 SEM1 SEM2 TFM1 TGM1 TMM1 TEF1 TEM1 TEF2 TEM2 TFF1 TGF1 TMF1)
 4 | 
 5 | file_idx=E10061
 6 | src_spk=SEF2
 7 | 
 8 | in_dir=wav
 9 | out_dir=wav_cv_point
10 | 
11 | mkdir -p $out_dir
12 | 
13 | spk_idx=1
14 | 
15 | for spk in ${spks[@]};do
16 |     echo $file_idx $src_spk to $spk $spk_idx
17 |     ./bin/test_cycvae_mwdlp -i $spk_idx ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${spk_idx}_${file_idx}_${src_spk}-${spk}.wav 
18 |     #./bin/test_cycvae_mwdlp.exe -i $spk_idx ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${spk_idx}_${file_idx}_${src_spk}-${spk}.wav 
19 |     spk_idx=$(( ${spk_idx}+1  ))
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/inc/_kiss_fft_guts.h:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2003-2004, Mark Borgerding
  2 | 
  3 |   All rights reserved.
  4 | 
  5 |   Redistribution and use in source and binary forms, with or without
  6 |    modification, are permitted provided that the following conditions are met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright notice,
  9 |        this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright notice,
 11 |        this list of conditions and the following disclaimer in the
 12 |        documentation and/or other materials provided with the distribution.
 13 | 
 14 |   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 |   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 |   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 |   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 18 |   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 19 |   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 20 |   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 |   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 22 |   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23 |   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24 |   POSSIBILITY OF SUCH DAMAGE.*/
 25 | 
 26 | #ifndef KISS_FFT_GUTS_H
 27 | #define KISS_FFT_GUTS_H
 28 | 
 29 | #define MIN(a,b) ((a)<(b) ? (a):(b))
 30 | #define MAX(a,b) ((a)>(b) ? (a):(b))
 31 | 
 32 | /* kiss_fft.h
 33 |    defines kiss_fft_scalar as either short or a float type
 34 |    and defines
 35 |    typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
 36 | #include "kiss_fft.h"
 37 | 
 38 | /*
 39 |   Explanation of macros dealing with complex math:
 40 | 
 41 |    C_MUL(m,a,b)         : m = a*b
 42 |    C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
 43 |    C_SUB( res, a,b)     : res = a - b
 44 |    C_SUBFROM( res , a)  : res -= a
 45 |    C_ADDTO( res , a)    : res += a
 46 |  * */
 47 | #ifdef FIXED_POINT
 48 | #include "arch.h"
 49 | 
 50 | 
 51 | #define SAMP_MAX 2147483647
 52 | #define TWID_MAX 32767
 53 | #define TRIG_UPSCALE 1
 54 | 
 55 | #define SAMP_MIN -SAMP_MAX
 56 | 
 57 | 
 58 | #   define S_MUL(a,b) MULT16_32_Q15(b, a)
 59 | 
 60 | #   define C_MUL(m,a,b) \
 61 |       do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
 62 |           (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
 63 | 
 64 | #   define C_MULC(m,a,b) \
 65 |       do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
 66 |           (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
 67 | 
 68 | #   define C_MULBYSCALAR( c, s ) \
 69 |       do{ (c).r =  S_MUL( (c).r , s ) ;\
 70 |           (c).i =  S_MUL( (c).i , s ) ; }while(0)
 71 | 
 72 | #   define DIVSCALAR(x,k) \
 73 |         (x) = S_MUL(  x, (TWID_MAX-((k)>>1))/(k)+1 )
 74 | 
 75 | #   define C_FIXDIV(c,div) \
 76 |         do {    DIVSCALAR( (c).r , div);  \
 77 |                 DIVSCALAR( (c).i  , div); }while (0)
 78 | 
 79 | #define  C_ADD( res, a,b)\
 80 |     do {(res).r=ADD32_ovflw((a).r,(b).r);  (res).i=ADD32_ovflw((a).i,(b).i); \
 81 |     }while(0)
 82 | #define  C_SUB( res, a,b)\
 83 |     do {(res).r=SUB32_ovflw((a).r,(b).r);  (res).i=SUB32_ovflw((a).i,(b).i); \
 84 |     }while(0)
 85 | #define C_ADDTO( res , a)\
 86 |     do {(res).r = ADD32_ovflw((res).r, (a).r);  (res).i = ADD32_ovflw((res).i,(a).i);\
 87 |     }while(0)
 88 | 
 89 | #define C_SUBFROM( res , a)\
 90 |     do {(res).r = ADD32_ovflw((res).r,(a).r);  (res).i = SUB32_ovflw((res).i,(a).i); \
 91 |     }while(0)
 92 | 
 93 | #if defined(OPUS_ARM_INLINE_ASM)
 94 | #include "arm/kiss_fft_armv4.h"
 95 | #endif
 96 | 
 97 | #if defined(OPUS_ARM_INLINE_EDSP)
 98 | #include "arm/kiss_fft_armv5e.h"
 99 | #endif
100 | #if defined(MIPSr1_ASM)
101 | #include "mips/kiss_fft_mipsr1.h"
102 | #endif
103 | 
104 | #else  /* not FIXED_POINT*/
105 | 
106 | #   define S_MUL(a,b) ( (a)*(b) )
107 | #define C_MUL(m,a,b) \
108 |     do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
109 |         (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
110 | #define C_MULC(m,a,b) \
111 |     do{ (m).r = (a).r*(b).r + (a).i*(b).i;\
112 |         (m).i = (a).i*(b).r - (a).r*(b).i; }while(0)
113 | 
114 | #define C_MUL4(m,a,b) C_MUL(m,a,b)
115 | 
116 | #   define C_FIXDIV(c,div) /* NOOP */
117 | #   define C_MULBYSCALAR( c, s ) \
118 |     do{ (c).r *= (s);\
119 |         (c).i *= (s); }while(0)
120 | #endif
121 | 
122 | #ifndef CHECK_OVERFLOW_OP
123 | #  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
124 | #endif
125 | 
126 | #ifndef C_ADD
127 | #define  C_ADD( res, a,b)\
128 |     do { \
129 |             CHECK_OVERFLOW_OP((a).r,+,(b).r)\
130 |             CHECK_OVERFLOW_OP((a).i,+,(b).i)\
131 |             (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
132 |     }while(0)
133 | #define  C_SUB( res, a,b)\
134 |     do { \
135 |             CHECK_OVERFLOW_OP((a).r,-,(b).r)\
136 |             CHECK_OVERFLOW_OP((a).i,-,(b).i)\
137 |             (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
138 |     }while(0)
139 | #define C_ADDTO( res , a)\
140 |     do { \
141 |             CHECK_OVERFLOW_OP((res).r,+,(a).r)\
142 |             CHECK_OVERFLOW_OP((res).i,+,(a).i)\
143 |             (res).r += (a).r;  (res).i += (a).i;\
144 |     }while(0)
145 | 
146 | #define C_SUBFROM( res , a)\
147 |     do {\
148 |             CHECK_OVERFLOW_OP((res).r,-,(a).r)\
149 |             CHECK_OVERFLOW_OP((res).i,-,(a).i)\
150 |             (res).r -= (a).r;  (res).i -= (a).i; \
151 |     }while(0)
152 | #endif /* C_ADD defined */
153 | 
154 | #ifdef FIXED_POINT
155 | /*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
156 | #  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
157 | #  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))
158 | #  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))
159 | #  define HALF_OF(x) ((x)>>1)
160 | #elif defined(USE_SIMD)
161 | #  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
162 | #  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
163 | #  define HALF_OF(x) ((x)*_mm_set1_ps(.5f))
164 | #else
165 | #  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
166 | #  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
167 | #  define HALF_OF(x) ((x)*.5f)
168 | #endif
169 | 
170 | #define  kf_cexp(x,phase) \
171 |         do{ \
172 |                 (x)->r = KISS_FFT_COS(phase);\
173 |                 (x)->i = KISS_FFT_SIN(phase);\
174 |         }while(0)
175 | 
176 | #define  kf_cexp2(x,phase) \
177 |    do{ \
178 |       (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
179 |       (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
180 | }while(0)
181 | 
182 | #endif /* KISS_FFT_GUTS_H */
183 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/inc/common.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef COMMON_H
 4 | #define COMMON_H
 5 | 
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | #include <math.h>
 9 | 
10 | #define RNN_INLINE inline
11 | #define OPUS_INLINE inline
12 | 
13 | #define LOG256 5.5451774445f
14 | static RNN_INLINE float log2_approx(float x)
15 | {
16 |    int integer;
17 |    float frac;
18 |    union {
19 |       float f;
20 |       int i;
21 |    } in;
22 |    in.f = x;
23 |    integer = (in.i>>23)-127;
24 |    in.i -= integer<<23;
25 |    frac = in.f - 1.5f;
26 |    frac = -0.41445418f + frac*(0.95909232f
27 |           + frac*(-0.33951290f + frac*0.16541097f));
28 |    return 1+integer+frac;
29 | }
30 | 
31 | #define log_approx(x) (0.69315f*log2_approx(x))
32 | 
33 | 
34 | /** Copy n elements from src to dst. The 0* term provides compile-time type checking  */
35 | #ifndef OVERRIDE_RNN_COPY
36 | #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
37 | #endif
38 | 
39 | /** Copy n elements from src to dst, allowing overlapping regions. The 0* term
40 |     provides compile-time type checking */
41 | #ifndef OVERRIDE_RNN_MOVE
42 | #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
43 | #endif
44 | 
45 | /** Set n elements of dst to zero */
46 | #ifndef OVERRIDE_RNN_CLEAR
47 | #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
48 | #endif
49 | 
50 | 
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/inc/freq.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2017-2018 Mozilla */
 2 | /*
 3 |    Redistribution and use in source and binary forms, with or without
 4 |    modification, are permitted provided that the following conditions
 5 |    are met:
 6 | 
 7 |    - Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 |    - Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Dec. 2020,
27 |    marked by PLT_Dec20 */
28 | 
29 | #include "freq_conf.h"
30 | #include "kiss_fft.h"
31 | 
32 | 
33 | //PLT_Dec20
34 | /*
35 |     Define these on freq_conf.h
36 |     SAMPLING_FREQUENCY 16000 //fs
37 |     FRAME_SHIFT 80 //int((fs/1000)*shiftms); shiftms = 5 ms
38 |     WINDOW_LENGTH 440 //int((fs/1000)*winms); winms = 27.5 ms
39 |     FFT_LENGTH 1024 //fs=8kHz-16kHz: 1024; 22.05kHz-24kHz: 2048; 44.1kHz-48kHz: 4096
40 |     HPASS_FILT_TAPS 1023 //order+1, has to be odd because high-pass filter passes nyq. freq.
41 | */
42 | 
43 | #define WINDOW_LENGTH_1 (WINDOW_LENGTH - 1)
44 | #define WINDOW_LENGTH_2 (WINDOW_LENGTH_1 - 1) //for indexing right side window buffer
45 | 
46 | #define HALF_WINDOW_LENGTH_1 (WINDOW_LENGTH_1 / 2) //does not include 1st [0] and (1+((N-1)/2)+((N-1)%2))th [1] if (N-1)%2 == 1
47 | 
48 | #define WIN_PAD (FFT_LENGTH - WINDOW_LENGTH) //window is centered on total FFT length
49 | 
50 | #define WIN_PAD_LEFT (WIN_PAD / 2)
51 | #define WIN_PAD_RIGHT (WIN_PAD_LEFT + (WIN_PAD % 2)) //right pad is more than 1 if total pad is odd
52 | 
53 | #define HALF_FFT_LENGTH (FFT_LENGTH / 2)
54 | 
55 | #define LEFT_SAMPLES (HALF_FFT_LENGTH - WIN_PAD_LEFT) //samples at left-side window / reflected samples at the left edge
56 | #define RIGHT_SAMPLES (HALF_FFT_LENGTH - WIN_PAD_RIGHT) //samples at right-side window / reflected samples at the right edge
57 | 
58 | #define LEFT_SAMPLES_1 (LEFT_SAMPLES - 1) //for indexing first frame samples
59 | #define LEFT_SAMPLES_2 (LEFT_SAMPLES_1 - 1) //for indexing first frame reflected samples
60 | #define RIGHT_SAMPLES_1 (RIGHT_SAMPLES - 1) //for indexing first frame samples
61 | 
62 | #define WIN_LEFT_IDX (WIN_PAD_LEFT + 1) //0->439, index of centered 1st in total FFT-length, exclude first sample (+1) [0 coefficient]
63 | #define WIN_RIGHT_IDX (WIN_LEFT_IDX - 1 + WINDOW_LENGTH - 1) //0->439, index of centered 440th in total FFT-length
64 | 
65 | #define BUFFER_LENGTH (WINDOW_LENGTH_1 - FRAME_SHIFT) //store samples for proceeding frame
66 | 
67 | #define HPASS_FILT_TAPS_1 (HPASS_FILT_TAPS - 1)
68 | 
69 | #define MAGSP_DIM (HALF_FFT_LENGTH + 1)
70 | #define MELSP_MAGSP_DIM (MAGSP_DIM * MEL_DIM)
71 | 
72 | 
73 | //PLT_Dec20
74 | typedef struct {
75 |     kiss_fft_state *kfft;
76 |     float hpass_filt[HPASS_FILT_TAPS];
77 |     float half_window[HALF_WINDOW_LENGTH_1];
78 |     float samples_hpass[HPASS_FILT_TAPS];
79 |     float samples_win[WINDOW_LENGTH_1]; //exclude first sample because of coefficient 0
80 |     kiss_fft_cpx in_fft[FFT_LENGTH]; //initialized with zeros, fill in only centered window_length
81 |     kiss_fft_cpx out_fft[FFT_LENGTH];
82 |     float magsp[MAGSP_DIM];
83 |     float melfb[MELSP_MAGSP_DIM];
84 | } DSPState;
85 | 
86 | int dspstate_get_size();
87 | 
88 | DSPState *dspstate_create();
89 | 
90 | void dspstate_destroy(DSPState *dsp);
91 | 
92 | void shift_apply_hpassfilt(DSPState *dsp, float *x);
93 | 
94 | void apply_window(DSPState *dsp);
95 | 
96 | void shift_apply_window(DSPState *dsp, const float *x);
97 | 
98 | void mel_spec_extract(DSPState *dsp, float *melsp);
99 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/inc/mwdlp10net_cycvae.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2018 Mozilla */
 2 | /*
 3 |    Redistribution and use in source and binary forms, with or without
 4 |    modification, are permitted provided that the following conditions
 5 |    are met:
 6 | 
 7 |    - Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 |    - Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Dec. 2020 - Aug. 2021,
27 |    marked by PLT_<MonthYear> */
28 | 
29 | //PLT_Dec20
30 | #ifndef _MWDLP10NET_CYCVAE_H_
31 | #define _MWDLP10NET_CYCVAE_H_
32 | 
33 | #ifndef MWDLP10NET_CYCVAE_EXPORT
34 | # if defined(WIN32)
35 | #  if defined(MWDLP10NET_CYCVAE_BUILD) && defined(DLL_EXPORT)
36 | #   define MWDLP10NET_CYCVAE_EXPORT __declspec(dllexport)
37 | #  else
38 | #   define MWDLP10NET_CYCVAE_EXPORT
39 | #  endif
40 | # elif defined(__GNUC__) && defined(MWDLP10NET_CYCVAE_BUILD)
41 | #  define MWDLP10NET_CYCVAE_EXPORT __attribute__ ((visibility ("default")))
42 | # else
43 | #  define MWDLP10NET_CYCVAE_EXPORT
44 | # endif
45 | #endif
46 | 
47 | 
48 | //PLT_Dec20
49 | typedef struct MWDLP10CycleVAEMelspExcitSpkNetState MWDLP10CycleVAEMelspExcitSpkNetState;
50 | 
51 | typedef struct MWDLP10NetState MWDLP10NetState;
52 | 
53 | MWDLP10NET_CYCVAE_EXPORT int mwdlp10cyclevaenet_get_size();
54 | 
55 | MWDLP10NET_CYCVAE_EXPORT MWDLP10CycleVAEMelspExcitSpkNetState *mwdlp10cyclevaenet_create();
56 | 
57 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10cyclevaenet_destroy(MWDLP10CycleVAEMelspExcitSpkNetState *mwdlp10cyclevaenet);
58 | 
59 | MWDLP10NET_CYCVAE_EXPORT int mwdlp10net_get_size();
60 | 
61 | MWDLP10NET_CYCVAE_EXPORT MWDLP10NetState *mwdlp10net_create();
62 | 
63 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_destroy(MWDLP10NetState *mwdlp10net);
64 | 
65 | //PLT_Jul21
66 | MWDLP10NET_CYCVAE_EXPORT void cyclevae_melsp_excit_spk_convert_mwdlp10net_synthesize(
67 |     MWDLP10CycleVAEMelspExcitSpkNetState *st, float *features, float *spk_code_aux,
68 |         short *output, int *n_output, int flag_last_frame);
69 |         //short *output, int *n_output, int flag_last_frame, float *melsp_in_tmp, float *conv_tmp, float *dense_tmp, float *gru_tmp, float *lat_tmp, float *spk_in_tmp, float *spk_red_tmp, float *spk_conv_tmp, float *spk_dense_tmp, float *spk_gru_tmp, float *spk_out_tmp, float *spk_tmp, float *melsp_red_tmp, float *melsp_conv_tmp, float *melsp_dense_tmp, float *melsp_gru_tmp, float *melsp_pdf_tmp, float *melsp_smpl_tmp);
70 | 
71 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_synthesize(MWDLP10NetState *st, const float *features,
72 |     short *output, int *n_output, int flag_last_frame);
73 |     //short *output, int *n_output, int flag_last_frame, float *mwdlp_conv_tmp, float *mwdlp_dense_tmp);
74 | 
75 | //PLT_Jul21
76 | MWDLP10NET_CYCVAE_EXPORT void cyclevae_melsp_excit_spk_convert_mwdlp10net_synthesize_nodlpc(
77 |     MWDLP10CycleVAEMelspExcitSpkNetState *st, float *features, float *spk_code_aux,
78 |         short *output, int *n_output, int flag_last_frame);
79 |         //short *output, int *n_output, int flag_last_frame, float *melsp_in_tmp, float *conv_tmp, float *dense_tmp, float *gru_tmp, float *lat_tmp, float *spk_in_tmp, float *spk_red_tmp, float *spk_conv_tmp, float *spk_dense_tmp, float *spk_gru_tmp, float *spk_out_tmp, float *spk_tmp, float *melsp_red_tmp, float *melsp_conv_tmp, float *melsp_dense_tmp, float *melsp_gru_tmp, float *melsp_pdf_tmp, float *melsp_smpl_tmp);
80 | 
81 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_synthesize_nodlpc(MWDLP10NetState *st, const float *features,
82 |     short *output, int *n_output, int flag_last_frame);
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/inc/opus_types.h:
--------------------------------------------------------------------------------
  1 | /* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */
  2 | /* Modified by Jean-Marc Valin */
  3 | /*
  4 |    Redistribution and use in source and binary forms, with or without
  5 |    modification, are permitted provided that the following conditions
  6 |    are met:
  7 | 
  8 |    - Redistributions of source code must retain the above copyright
  9 |    notice, this list of conditions and the following disclaimer.
 10 | 
 11 |    - Redistributions in binary form must reproduce the above copyright
 12 |    notice, this list of conditions and the following disclaimer in the
 13 |    documentation and/or other materials provided with the distribution.
 14 | 
 15 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 19 |    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 23 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 24 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | */
 27 | /* opus_types.h based on ogg_types.h from libogg */
 28 | 
 29 | /**
 30 |    @file opus_types.h
 31 |    @brief Opus reference implementation types
 32 | */
 33 | #ifndef OPUS_TYPES_H
 34 | #define OPUS_TYPES_H
 35 | 
 36 | /* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
 37 | #if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
 38 | #include <stdint.h>
 39 | 
 40 |    typedef int16_t opus_int16;
 41 |    typedef uint16_t opus_uint16;
 42 |    typedef int32_t opus_int32;
 43 |    typedef uint32_t opus_uint32;
 44 | #elif defined(_WIN32)
 45 | 
 46 | #  if defined(__CYGWIN__)
 47 | #    include <_G_config.h>
 48 |      typedef _G_int32_t opus_int32;
 49 |      typedef _G_uint32_t opus_uint32;
 50 |      typedef _G_int16 opus_int16;
 51 |      typedef _G_uint16 opus_uint16;
 52 | #  elif defined(__MINGW32__)
 53 |      typedef short opus_int16;
 54 |      typedef unsigned short opus_uint16;
 55 |      typedef int opus_int32;
 56 |      typedef unsigned int opus_uint32;
 57 | #  elif defined(__MWERKS__)
 58 |      typedef int opus_int32;
 59 |      typedef unsigned int opus_uint32;
 60 |      typedef short opus_int16;
 61 |      typedef unsigned short opus_uint16;
 62 | #  else
 63 |      /* MSVC/Borland */
 64 |      typedef __int32 opus_int32;
 65 |      typedef unsigned __int32 opus_uint32;
 66 |      typedef __int16 opus_int16;
 67 |      typedef unsigned __int16 opus_uint16;
 68 | #  endif
 69 | 
 70 | #elif defined(__MACOS__)
 71 | 
 72 | #  include <sys/types.h>
 73 |    typedef SInt16 opus_int16;
 74 |    typedef UInt16 opus_uint16;
 75 |    typedef SInt32 opus_int32;
 76 |    typedef UInt32 opus_uint32;
 77 | 
 78 | #elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
 79 | 
 80 | #  include <sys/types.h>
 81 |    typedef int16_t opus_int16;
 82 |    typedef u_int16_t opus_uint16;
 83 |    typedef int32_t opus_int32;
 84 |    typedef u_int32_t opus_uint32;
 85 | 
 86 | #elif defined(__BEOS__)
 87 | 
 88 |    /* Be */
 89 | #  include <inttypes.h>
 90 |    typedef int16 opus_int16;
 91 |    typedef u_int16 opus_uint16;
 92 |    typedef int32_t opus_int32;
 93 |    typedef u_int32_t opus_uint32;
 94 | 
 95 | #elif defined (__EMX__)
 96 | 
 97 |    /* OS/2 GCC */
 98 |    typedef short opus_int16;
 99 |    typedef unsigned short opus_uint16;
100 |    typedef int opus_int32;
101 |    typedef unsigned int opus_uint32;
102 | 
103 | #elif defined (DJGPP)
104 | 
105 |    /* DJGPP */
106 |    typedef short opus_int16;
107 |    typedef unsigned short opus_uint16;
108 |    typedef int opus_int32;
109 |    typedef unsigned int opus_uint32;
110 | 
111 | #elif defined(R5900)
112 | 
113 |    /* PS2 EE */
114 |    typedef int opus_int32;
115 |    typedef unsigned opus_uint32;
116 |    typedef short opus_int16;
117 |    typedef unsigned short opus_uint16;
118 | 
119 | #elif defined(__SYMBIAN32__)
120 | 
121 |    /* Symbian GCC */
122 |    typedef signed short opus_int16;
123 |    typedef unsigned short opus_uint16;
124 |    typedef signed int opus_int32;
125 |    typedef unsigned int opus_uint32;
126 | 
127 | #elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
128 | 
129 |    typedef short opus_int16;
130 |    typedef unsigned short opus_uint16;
131 |    typedef long opus_int32;
132 |    typedef unsigned long opus_uint32;
133 | 
134 | #elif defined(CONFIG_TI_C6X)
135 | 
136 |    typedef short opus_int16;
137 |    typedef unsigned short opus_uint16;
138 |    typedef int opus_int32;
139 |    typedef unsigned int opus_uint32;
140 | 
141 | #else
142 | 
143 |    /* Give up, take a reasonable guess */
144 |    typedef short opus_int16;
145 |    typedef unsigned short opus_uint16;
146 |    typedef int opus_int32;
147 |    typedef unsigned int opus_uint32;
148 | 
149 | #endif
150 | 
151 | #define opus_int         int                     /* used for counters etc; at least 16 bits */
152 | #define opus_int64       long long
153 | #define opus_int8        signed char
154 | 
155 | #define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */
156 | #define opus_uint64      unsigned long long
157 | #define opus_uint8       unsigned char
158 | 
159 | #endif  /* OPUS_TYPES_H */
160 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/inc/tansig_table.h:
--------------------------------------------------------------------------------
 1 | /* This file is auto-generated by gen_tables */
 2 | 
 3 | static const float tansig_table[201] = {
 4 | 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
 5 | 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
 6 | 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
 7 | 0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
 8 | 0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
 9 | 0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
10 | 0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
11 | 0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
12 | 0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
13 | 0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
14 | 0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
15 | 0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
16 | 0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
17 | 0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
18 | 0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
19 | 0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
20 | 0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
21 | 0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
22 | 0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
23 | 0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
24 | 0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
25 | 0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
26 | 0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
27 | 0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
28 | 0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
29 | 0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
30 | 0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
31 | 0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
32 | 0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
33 | 0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
34 | 0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
35 | 0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
36 | 0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
37 | 0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
38 | 0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
39 | 0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
40 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
41 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
42 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
43 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
44 | 1.000000f,
45 | };
46 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/inc/vec.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018 Mozilla
  2 |                  2008-2011 Octasic Inc.
  3 |                  2012-2017 Jean-Marc Valin */
  4 | /*
  5 |    Redistribution and use in source and binary forms, with or without
  6 |    modification, are permitted provided that the following conditions
  7 |    are met:
  8 | 
  9 |    - Redistributions of source code must retain the above copyright
 10 |    notice, this list of conditions and the following disclaimer.
 11 | 
 12 |    - Redistributions in binary form must reproduce the above copyright
 13 |    notice, this list of conditions and the following disclaimer in the
 14 |    documentation and/or other materials provided with the distribution.
 15 | 
 16 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
 20 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 24 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 25 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | /* No AVX2/FMA support */
 29 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Sept.-Dec. 2020,
 30 |    marked by PLT_<Sep/Dec>20 */
 31 | 
 32 | #ifndef VEC_NEON_H
 33 | #define VEC_NEON_H
 34 | 
 35 | //PLT_Dec20
 36 | #include "tansig_table.h"
 37 | 
 38 | static float celt_exp2(float x)
 39 | {
 40 |    int integer;
 41 |    float frac;
 42 |    union {
 43 |       float f;
 44 |       opus_uint32 i;
 45 |    } res;
 46 |    integer = floor(x);
 47 |    if (integer < -50)
 48 |       return 0;
 49 |    frac = x-integer;
 50 |    /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
 51 |    res.f = 0.99992522f + frac * (0.69583354f
 52 |            + frac * (0.22606716f + 0.078024523f*frac));
 53 |    res.i = (res.i + (integer<<23)) & 0x7fffffff;
 54 |    return res.f;
 55 | }
 56 | #define celt_exp(x) celt_exp2((x)*1.44269504f)
 57 | 
 58 | static float tansig_approx(float x)
 59 | {
 60 |     int i;
 61 |     float y, dy;
 62 |     float sign=1;
 63 |     if (x<0)
 64 |     {
 65 |        x=-x;
 66 |        sign=-1;
 67 |     }
 68 |     i = (int)floor(.5f+25*x);
 69 |     i = IMAX(0, IMIN(200, i));
 70 |     x -= .04f*i;
 71 |     y = tansig_table[i];
 72 |     dy = 1-y*y;
 73 |     y = y + x*dy*(1 - y*x);
 74 |     return sign*y;
 75 | }
 76 | 
 77 | static OPUS_INLINE float sigmoid_approx(float x)
 78 | {
 79 |    return .5f + .5f*tansig_approx(.5f*x);
 80 | }
 81 | 
 82 | static void softmax(float *y, const float *x, int N)
 83 | {
 84 |     //int i;
 85 |     for (int i=0;i<N;i++)
 86 |         y[i] = celt_exp(x[i]);
 87 | }
 88 | 
 89 | static void vec_exp(float *y, const float *x, int N)
 90 | {
 91 |     //int i;
 92 |     for (int i=0;i<N;i++)
 93 |         y[i] = exp(x[i]);
 94 | }
 95 | 
 96 | static void vec_tanh(float *y, const float *x, int N)
 97 | {
 98 |     //int i;
 99 |     for (int i=0;i<N;i++)
100 |     {
101 |         y[i] = tansig_approx(x[i]);
102 |     }
103 | }
104 | 
105 | //PLT_Feb21
106 | static void vec_tanh_exp(float *y, const float *x, int N)
107 | {
108 |     //int i;
109 |     float ex2;
110 |     for (int i=0;i<N;i++)
111 |     {
112 |         //y[i] = tansig_approx(x[i]);
113 |         ex2 = exp(2*x[i]);
114 |         y[i] = (ex2-1)/(ex2+1);
115 |     }
116 | }
117 | 
118 | //PLT_Sep20
119 | static void vec_tanhshrink(float *y, const float *x, int N)
120 | {
121 |     //int i;
122 |     float ex2;
123 |     for (int i=0;i<N;i++)
124 |     {
125 |         //y[i] = x[i]-tansig_approx(x[i]);
126 |         ex2 = exp(2*x[i]);
127 |         y[i] = x[i]-(ex2-1)/(ex2+1);
128 |     }
129 | }
130 | 
131 | static void vec_sigmoid(float *y, const float *x, int N)
132 | {
133 |     //int i;
134 |     for (int i=0;i<N;i++)
135 |     {
136 |         y[i] = sigmoid_approx(x[i]);
137 |     }
138 | }
139 | 
140 | //PLT_Feb21
141 | static void vec_sigmoid_exp(float *y, const float *x, int N)
142 | {
143 |     //int i;
144 |     float ex;
145 |     for (int i=0;i<N;i++)
146 |     {
147 |         //y[i] = sigmoid_approx(x[i]);
148 |         ex = exp(x[i]);
149 |         y[i] = (ex)/(ex+1);
150 |     }
151 | }
152 | 
153 | static void sgemv_accum16(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
154 | {
155 |    int i, j;
156 |    for (i=0;i<rows;i+=16)
157 |    {
158 |       for (j=0;j<cols;j++)
159 |       {
160 |          const float * restrict w;
161 |          float * restrict y;
162 |          float xj;
163 |          w = &weights[j*col_stride + i];
164 |          xj = x[j];
165 |          y = &out[i];
166 |          y[0] += w[0]*xj;
167 |          y[1] += w[1]*xj;
168 |          y[2] += w[2]*xj;
169 |          y[3] += w[3]*xj;
170 |          y[4] += w[4]*xj;
171 |          y[5] += w[5]*xj;
172 |          y[6] += w[6]*xj;
173 |          y[7] += w[7]*xj;
174 |          y[8] += w[8]*xj;
175 |          y[9] += w[9]*xj;
176 |          y[10] += w[10]*xj;
177 |          y[11] += w[11]*xj;
178 |          y[12] += w[12]*xj;
179 |          y[13] += w[13]*xj;
180 |          y[14] += w[14]*xj;
181 |          y[15] += w[15]*xj;
182 |       }
183 |    }
184 | }
185 | 
186 | static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int *idx, const float *x)
187 | {
188 |    int i, j;
189 |    for (i=0;i<rows;i+=16)
190 |    {
191 |       int cols;
192 |       cols = *idx++;
193 |       for (j=0;j<cols;j++)
194 |       {
195 |          float * restrict y;
196 |          float xj;
197 |          xj = x[*idx++];
198 |          y = &out[i];
199 |          y[0] += w[0]*xj;
200 |          y[1] += w[1]*xj;
201 |          y[2] += w[2]*xj;
202 |          y[3] += w[3]*xj;
203 |          y[4] += w[4]*xj;
204 |          y[5] += w[5]*xj;
205 |          y[6] += w[6]*xj;
206 |          y[7] += w[7]*xj;
207 |          y[8] += w[8]*xj;
208 |          y[9] += w[9]*xj;
209 |          y[10] += w[10]*xj;
210 |          y[11] += w[11]*xj;
211 |          y[12] += w[12]*xj;
212 |          y[13] += w[13]*xj;
213 |          y[14] += w[14]*xj;
214 |          y[15] += w[15]*xj;
215 |          w += 16;
216 |       }
217 |    }
218 | }
219 | 
220 | #endif /* VEC_H */
221 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_init/inc/wave.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2021 Patrick Lumban Tobing (Nagoya University)
 3 |    Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 4 | 
 5 |    WAV file read/write is based on http://truelogic.org/wordpress/2015/09/04/parsing-a-wav-file-in-c
 6 | */
 7 | 
 8 | 
 9 | // WAVE file header format
10 | struct HEADER {
11 |     unsigned char riff[4];                      // RIFF string
12 |     unsigned int overall_size;               // overall size of file in bytes
13 |     unsigned char wave[4];                      // WAVE string
14 |     unsigned char fmt_chunk_marker[4];          // fmt string with trailing null char
15 |     unsigned int length_of_fmt;                 // length of the format data
16 |     unsigned short format_type;                   // format type. 1-PCM, 3- IEEE float, 6 - 8bit A law, 7 - 8bit mu law
17 |     unsigned short channels;                      // no.of channels
18 |     unsigned int sample_rate;                   // sampling rate (blocks per second)
19 |     unsigned int byterate;                      // SampleRate * NumChannels * BitsPerSample/8
20 |     unsigned short block_align;                   // NumChannels * BitsPerSample/8
21 |     unsigned short bits_per_sample;               // bits per sample, 8- 8bits, 16- 16 bits etc
22 |     unsigned char data_chunk_header [4];        // DATA string or FLLR string
23 |     unsigned int data_size;                     // NumSamples * NumChannels * BitsPerSample/8 - size of the next chunk that will be read
24 | };
25 | 
26 | /*
27 |     Positions   Sample Value    Description
28 |     1 – 4   “RIFF”  Marks the file as a riff file. Characters are each 1 byte long.
29 |     5 – 8   File size (integer)     Size of the overall file – 8 bytes, in bytes (32-bit integer). Typically, you’d fill this in after creation.
30 |     9 -12   “WAVE”  File Type Header. For our purposes, it always equals “WAVE”.
31 |     13-16   “fmt “  Format chunk marker. Includes trailing null
32 |     17-20   16  Length of format data as listed above
33 |     21-22   1   Type of format (1 is PCM) – 2 byte integer
34 |     23-24   2   Number of Channels – 2 byte integer
35 |     25-28   44100   Sample Rate – 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
36 |     29-32   176400  (Sample Rate * BitsPerSample * Channels) / 8.
37 |     33-34   4   (BitsPerSample * Channels) / 8.1 – 8 bit mono2 – 8 bit stereo/16 bit mono4 – 16 bit stereo
38 |     35-36   16  Bits per sample
39 |     37-40   “data”  “data” chunk header. Marks the beginning of the data section.
40 |     41-44   File size (data)    Size of the data section.
41 |     Sample values are given above for a 16-bit stereo source.
42 | 
43 |     It is important to note that the WAV format uses little-endian [LSB in smallest address] format to store bytes,
44 |     so you need to convert the bytes to big-endian [MSB in smallest address] in code for the values to make sense.
45 | */
46 | 
47 | short read_write_wav(FILE *fin, FILE *fout, short *num_reflected_right_edge_samples, long *num_samples, long *size_of_each_sample);
48 | long read_feat_write_wav(FILE* fin, FILE* fout, int bin_flag);
49 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/Makefile:
--------------------------------------------------------------------------------
 1 | LDIR = lib
 2 | $(shell mkdir -p $(LDIR))
 3 | LIBNAME = mwdlp10cycvae
 4 | OUT = ${LDIR}/lib${LIBNAME}.a
 5 | 
 6 | CC = gcc
 7 | CFLAGS = -mavx2 -mfma -g -O3 -Wall -W -Wextra -fpic
 8 | LFLAGS = -lm
 9 | 
10 | ODIR = obj
11 | $(shell mkdir -p $(ODIR))
12 | 
13 | SDIR = src
14 | IDIR = inc
15 | BDIR = bin
16 | $(shell mkdir -p $(BDIR))
17 | 
18 | INC = -I${IDIR}
19 | LIB = -L${LDIR} -l${LIBNAME}
20 | TARGET = test_cycvae_mwdlp
21 | 
22 | _OBJS = nnet.o mwdlp10net_cycvae.o kiss_fft.o freq.o wave.o nnet_data.o nnet_cv_data.o
23 | OBJS = $(patsubst %,$(ODIR)/%,$(_OBJS))
24 | 
25 | 
26 | all: ${OUT}
27 | 	$(CC) $(CFLAGS) ${SDIR}/${TARGET}.c $(INC) ${LIB} ${LFLAGS} -o ${BDIR}/${TARGET}
28 | 
29 | $(OUT): $(OBJS) 
30 | 	ar rvs $(OUT) $^
31 | 
32 | $(ODIR)/%.o: $(SDIR)/%.c
33 | 	$(CC) $(CFLAGS) $(INC) -c -o $@ $< ${LFLAGS}
34 | 
35 | .PHONY: clean
36 | 
37 | clean:
38 | 	rm -f $(ODIR)/*.o $(OUT) ${BDIR}/${TARGET}
39 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/demo_anasyn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | #in_dir=wav_8kHz
 5 | #in_dir=wav_16kHz
 6 | in_dir=wav_24kHz
 7 | #out_dir=wav_anasyn_8kHz
 8 | #out_dir=wav_anasyn_16kHz
 9 | out_dir=wav_anasyn_24kHz
10 | 
11 | mkdir -p $out_dir
12 | 
13 | ls ${in_dir}/*.wav > tmp_anasyn.list
14 | 
15 | while read line;do
16 |     name=`basename $line`
17 |     echo $line $name
18 |     ./bin/test_mwdlp $line ${out_dir}/$name
19 |     #./bin/test_mwdlp.exe $line ${out_dir}/$name
20 | done < tmp_anasyn.list
21 | 
22 | rm -f tmp_anasyn.list
23 | 
24 | #split=(${line// / })
25 | #for spk in ${spks[@]};do
26 | #    spk_idx=$(( ${spk_idx}+1  ))
27 | #count=`expr $count + 1`
28 | #done
29 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/demo_interp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | x_coords=(0.00 -0.05 -0.10 -0.15 -0.20 -0.25 -0.30 -0.40)
 4 | y_coords=(0.00 -0.05 -0.10 -0.15 -0.20 -0.25 -0.30 -0.35 -0.40 -0.45)
 5 | 
 6 | file_idx=001
 7 | src_spk=p326
 8 | 
 9 | in_dir=wav
10 | out_dir=wav_cv_interp
11 | 
12 | mkdir -p $out_dir
13 | 
14 | for x in ${x_coords[@]};do
15 | for y in ${y_coords[@]};do
16 |     echo $file_idx $src_spk to $x $y
17 |     ./bin/test_cycvae_mwdlp -c $x $y ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${file_idx}_${src_spk}-interpolate_${x}_${y}.wav 
18 |     #./bin/test_cycvae_mwdlp.exe -c $x $y ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${file_idx}_${src_spk}-interpolate_${x}_${y}.wav 
19 | done
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/demo_melsp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | in_dir=wav
 5 | out_dir=wav_melsp
 6 | 
 7 | mkdir -p $out_dir
 8 | 
 9 | #ls ${in_dir}/*.wav > tmp.list
10 | #
11 | #while read line;do
12 | #    name=`basename $line .wav`
13 | #    echo $line $name
14 | #    ./bin/test_mwdlp -o melsp.bin melsp.txt $line ${out_dir}/${name}_anasyn.wav
15 | #    ./bin/test_mwdlp -b melsp.bin ${out_dir}/${name}_binsyn.wav
16 | #    ./bin/test_mwdlp -t melsp.txt ${out_dir}/${name}_txtsyn.wav
17 | #    ./bin/test_mwdlp.exe -o melsp.bin melsp.txt $line ${out_dir}/${name}_anasyn.wav
18 | #    ./bin/test_mwdlp.exe -b melsp.bin ${out_dir}/${name}_binsyn.wav
19 | #    ./bin/test_mwdlp.exe -t melsp.txt ${out_dir}/${name}_txtsyn.wav
20 | #done < tmp.list
21 | #
22 | #rm -f tmp.list
23 | 
24 | line=${in_dir}/001_p326.wav
25 | name=`basename $line .wav`
26 | 
27 | ./bin/test_mwdlp -o ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_melsp.txt $line ${out_dir}/${name}_anasyn.wav
28 | ./bin/test_mwdlp -b ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_binsyn.wav
29 | ./bin/test_mwdlp -t ${out_dir}/${name}_melsp.txt ${out_dir}/${name}_txtsyn.wav
30 | #./bin/test_mwdlp.exe -o ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_melsp.txt $line ${out_dir}/${name}_anasyn.wav
31 | #./bin/test_mwdlp.exe -b ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_binsyn.wav
32 | #./bin/test_mwdlp.exe -t ${out_dir}/${name}_melsp.txt ${out_dir}/${name}_txtsyn.wav
33 | 
34 | #split=(${line// / })
35 | #for spk in ${spks[@]};do
36 | #    spk_idx=$(( ${spk_idx}+1  ))
37 | #count=`expr $count + 1`
38 | #done
39 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/demo_point.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | spks=(SEF1 SEF2 SEM1 SEM2 TFM1 TGM1 TMM1 TEF1 TEM1 TEF2 TEM2 TFF1 TGF1 TMF1)
 4 | 
 5 | file_idx=E10061
 6 | src_spk=SEF2
 7 | 
 8 | in_dir=wav
 9 | out_dir=wav_cv_point
10 | 
11 | mkdir -p $out_dir
12 | 
13 | spk_idx=1
14 | 
15 | for spk in ${spks[@]};do
16 |     echo $file_idx $src_spk to $spk $spk_idx
17 |     ./bin/test_cycvae_mwdlp -i $spk_idx ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${spk_idx}_${file_idx}_${src_spk}-${spk}.wav 
18 |     #./bin/test_cycvae_mwdlp.exe -i $spk_idx ${in_dir}/${file_idx}_${src_spk}.wav ${out_dir}/${spk_idx}_${file_idx}_${src_spk}-${spk}.wav 
19 |     spk_idx=$(( ${spk_idx}+1  ))
20 | done
21 | 
22 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/inc/common.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef COMMON_H
 4 | #define COMMON_H
 5 | 
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | #include <math.h>
 9 | 
10 | #define RNN_INLINE inline
11 | #define OPUS_INLINE inline
12 | 
13 | #define LOG256 5.5451774445f
14 | static RNN_INLINE float log2_approx(float x)
15 | {
16 |    int integer;
17 |    float frac;
18 |    union {
19 |       float f;
20 |       int i;
21 |    } in;
22 |    in.f = x;
23 |    integer = (in.i>>23)-127;
24 |    in.i -= integer<<23;
25 |    frac = in.f - 1.5f;
26 |    frac = -0.41445418f + frac*(0.95909232f
27 |           + frac*(-0.33951290f + frac*0.16541097f));
28 |    return 1+integer+frac;
29 | }
30 | 
31 | #define log_approx(x) (0.69315f*log2_approx(x))
32 | 
33 | 
34 | /** Copy n elements from src to dst. The 0* term provides compile-time type checking  */
35 | #ifndef OVERRIDE_RNN_COPY
36 | #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
37 | #endif
38 | 
39 | /** Copy n elements from src to dst, allowing overlapping regions. The 0* term
40 |     provides compile-time type checking */
41 | #ifndef OVERRIDE_RNN_MOVE
42 | #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
43 | #endif
44 | 
45 | /** Set n elements of dst to zero */
46 | #ifndef OVERRIDE_RNN_CLEAR
47 | #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
48 | #endif
49 | 
50 | 
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/inc/freq.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2017-2018 Mozilla */
 2 | /*
 3 |    Redistribution and use in source and binary forms, with or without
 4 |    modification, are permitted provided that the following conditions
 5 |    are met:
 6 | 
 7 |    - Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 |    - Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Dec. 2020,
27 |    marked by PLT_Dec20 */
28 | 
29 | #include "freq_conf.h"
30 | #include "kiss_fft.h"
31 | 
32 | 
33 | //PLT_Dec20
34 | /*
35 |     Define these on freq_conf.h
36 |     SAMPLING_FREQUENCY 16000 //fs
37 |     FRAME_SHIFT 80 //int((fs/1000)*shiftms); shiftms = 5 ms
38 |     WINDOW_LENGTH 440 //int((fs/1000)*winms); winms = 27.5 ms
39 |     FFT_LENGTH 1024 //fs=8kHz-16kHz: 1024; 22.05kHz-24kHz: 2048; 44.1kHz-48kHz: 4096
40 |     HPASS_FILT_TAPS 1023 //order+1, has to be odd because high-pass filter passes nyq. freq.
41 | */
42 | 
43 | #define WINDOW_LENGTH_1 (WINDOW_LENGTH - 1)
44 | #define WINDOW_LENGTH_2 (WINDOW_LENGTH_1 - 1) //for indexing right side window buffer
45 | 
46 | #define HALF_WINDOW_LENGTH_1 (WINDOW_LENGTH_1 / 2) //does not include 1st [0] and (1+((N-1)/2)+((N-1)%2))th [1] if (N-1)%2 == 1
47 | 
48 | #define WIN_PAD (FFT_LENGTH - WINDOW_LENGTH) //window is centered on total FFT length
49 | 
50 | #define WIN_PAD_LEFT (WIN_PAD / 2)
51 | #define WIN_PAD_RIGHT (WIN_PAD_LEFT + (WIN_PAD % 2)) //right pad is more than 1 if total pad is odd
52 | 
53 | #define HALF_FFT_LENGTH (FFT_LENGTH / 2)
54 | 
55 | #define LEFT_SAMPLES (HALF_FFT_LENGTH - WIN_PAD_LEFT) //samples at left-side window / reflected samples at the left edge
56 | #define RIGHT_SAMPLES (HALF_FFT_LENGTH - WIN_PAD_RIGHT) //samples at right-side window / reflected samples at the right edge
57 | 
58 | #define LEFT_SAMPLES_1 (LEFT_SAMPLES - 1) //for indexing first frame samples
59 | #define LEFT_SAMPLES_2 (LEFT_SAMPLES_1 - 1) //for indexing first frame reflected samples
60 | #define RIGHT_SAMPLES_1 (RIGHT_SAMPLES - 1) //for indexing first frame samples
61 | 
62 | #define WIN_LEFT_IDX (WIN_PAD_LEFT + 1) //0->439, index of centered 1st in total FFT-length, exclude first sample (+1) [0 coefficient]
63 | #define WIN_RIGHT_IDX (WIN_LEFT_IDX - 1 + WINDOW_LENGTH - 1) //0->439, index of centered 440th in total FFT-length
64 | 
65 | #define BUFFER_LENGTH (WINDOW_LENGTH_1 - FRAME_SHIFT) //store samples for proceeding frame
66 | 
67 | #define HPASS_FILT_TAPS_1 (HPASS_FILT_TAPS - 1)
68 | 
69 | #define MAGSP_DIM (HALF_FFT_LENGTH + 1)
70 | #define MELSP_MAGSP_DIM (MAGSP_DIM * MEL_DIM)
71 | 
72 | 
73 | //PLT_Dec20
74 | typedef struct {
75 |     kiss_fft_state *kfft;
76 |     float hpass_filt[HPASS_FILT_TAPS];
77 |     float half_window[HALF_WINDOW_LENGTH_1];
78 |     float samples_hpass[HPASS_FILT_TAPS];
79 |     float samples_win[WINDOW_LENGTH_1]; //exclude first sample because of coefficient 0
80 |     kiss_fft_cpx in_fft[FFT_LENGTH]; //initialized with zeros, fill in only centered window_length
81 |     kiss_fft_cpx out_fft[FFT_LENGTH];
82 |     float magsp[MAGSP_DIM];
83 |     float melfb[MELSP_MAGSP_DIM];
84 | } DSPState;
85 | 
86 | int dspstate_get_size();
87 | 
88 | DSPState *dspstate_create();
89 | 
90 | void dspstate_destroy(DSPState *dsp);
91 | 
92 | void shift_apply_hpassfilt(DSPState *dsp, float *x);
93 | 
94 | void apply_window(DSPState *dsp);
95 | 
96 | void shift_apply_window(DSPState *dsp, const float *x);
97 | 
98 | void mel_spec_extract(DSPState *dsp, float *melsp);
99 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/inc/mwdlp10net_cycvae.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2018 Mozilla */
 2 | /*
 3 |    Redistribution and use in source and binary forms, with or without
 4 |    modification, are permitted provided that the following conditions
 5 |    are met:
 6 | 
 7 |    - Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 |    - Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Dec. 2020 - Aug. 2021,
27 |    marked by PLT_<MonthYear> */
28 | 
29 | //PLT_Dec20
30 | #ifndef _MWDLP10NET_CYCVAE_H_
31 | #define _MWDLP10NET_CYCVAE_H_
32 | 
33 | #ifndef MWDLP10NET_CYCVAE_EXPORT
34 | # if defined(WIN32)
35 | #  if defined(MWDLP10NET_CYCVAE_BUILD) && defined(DLL_EXPORT)
36 | #   define MWDLP10NET_CYCVAE_EXPORT __declspec(dllexport)
37 | #  else
38 | #   define MWDLP10NET_CYCVAE_EXPORT
39 | #  endif
40 | # elif defined(__GNUC__) && defined(MWDLP10NET_CYCVAE_BUILD)
41 | #  define MWDLP10NET_CYCVAE_EXPORT __attribute__ ((visibility ("default")))
42 | # else
43 | #  define MWDLP10NET_CYCVAE_EXPORT
44 | # endif
45 | #endif
46 | 
47 | 
48 | //PLT_Dec20
49 | typedef struct MWDLP10CycleVAEMelspExcitSpkNetState MWDLP10CycleVAEMelspExcitSpkNetState;
50 | 
51 | typedef struct MWDLP10NetState MWDLP10NetState;
52 | 
53 | MWDLP10NET_CYCVAE_EXPORT int mwdlp10cyclevaenet_get_size();
54 | 
55 | MWDLP10NET_CYCVAE_EXPORT MWDLP10CycleVAEMelspExcitSpkNetState *mwdlp10cyclevaenet_create();
56 | 
57 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10cyclevaenet_destroy(MWDLP10CycleVAEMelspExcitSpkNetState *mwdlp10cyclevaenet);
58 | 
59 | MWDLP10NET_CYCVAE_EXPORT int mwdlp10net_get_size();
60 | 
61 | MWDLP10NET_CYCVAE_EXPORT MWDLP10NetState *mwdlp10net_create();
62 | 
63 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_destroy(MWDLP10NetState *mwdlp10net);
64 | 
65 | //PLT_Jul21
66 | MWDLP10NET_CYCVAE_EXPORT void cyclevae_melsp_excit_spk_convert_mwdlp10net_synthesize(
67 |     MWDLP10CycleVAEMelspExcitSpkNetState *st, float *features, float *spk_code_aux,
68 |         short *output, int *n_output, int flag_last_frame);
69 |         //short *output, int *n_output, int flag_last_frame, float *melsp_in_tmp, float *conv_tmp, float *dense_tmp, float *gru_tmp, float *lat_tmp, float *spk_in_tmp, float *spk_red_tmp, float *spk_conv_tmp, float *spk_dense_tmp, float *spk_gru_tmp, float *spk_out_tmp, float *spk_tmp, float *melsp_red_tmp, float *melsp_conv_tmp, float *melsp_dense_tmp, float *melsp_gru_tmp, float *melsp_pdf_tmp, float *melsp_smpl_tmp);
70 | 
71 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_synthesize(MWDLP10NetState *st, const float *features,
72 |     short *output, int *n_output, int flag_last_frame);
73 |     //short *output, int *n_output, int flag_last_frame, float *mwdlp_conv_tmp, float *mwdlp_dense_tmp);
74 | 
75 | //PLT_Jul21
76 | MWDLP10NET_CYCVAE_EXPORT void cyclevae_melsp_excit_spk_convert_mwdlp10net_synthesize_nodlpc(
77 |     MWDLP10CycleVAEMelspExcitSpkNetState *st, float *features, float *spk_code_aux,
78 |         short *output, int *n_output, int flag_last_frame);
79 |         //short *output, int *n_output, int flag_last_frame, float *melsp_in_tmp, float *conv_tmp, float *dense_tmp, float *gru_tmp, float *lat_tmp, float *spk_in_tmp, float *spk_red_tmp, float *spk_conv_tmp, float *spk_dense_tmp, float *spk_gru_tmp, float *spk_out_tmp, float *spk_tmp, float *melsp_red_tmp, float *melsp_conv_tmp, float *melsp_dense_tmp, float *melsp_gru_tmp, float *melsp_pdf_tmp, float *melsp_smpl_tmp);
80 | 
81 | MWDLP10NET_CYCVAE_EXPORT void mwdlp10net_synthesize_nodlpc(MWDLP10NetState *st, const float *features,
82 |     short *output, int *n_output, int flag_last_frame);
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/inc/opus_types.h:
--------------------------------------------------------------------------------
  1 | /* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */
  2 | /* Modified by Jean-Marc Valin */
  3 | /*
  4 |    Redistribution and use in source and binary forms, with or without
  5 |    modification, are permitted provided that the following conditions
  6 |    are met:
  7 | 
  8 |    - Redistributions of source code must retain the above copyright
  9 |    notice, this list of conditions and the following disclaimer.
 10 | 
 11 |    - Redistributions in binary form must reproduce the above copyright
 12 |    notice, this list of conditions and the following disclaimer in the
 13 |    documentation and/or other materials provided with the distribution.
 14 | 
 15 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 19 |    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 23 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 24 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | */
 27 | /* opus_types.h based on ogg_types.h from libogg */
 28 | 
 29 | /**
 30 |    @file opus_types.h
 31 |    @brief Opus reference implementation types
 32 | */
 33 | #ifndef OPUS_TYPES_H
 34 | #define OPUS_TYPES_H
 35 | 
 36 | /* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
 37 | #if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
 38 | #include <stdint.h>
 39 | 
 40 |    typedef int16_t opus_int16;
 41 |    typedef uint16_t opus_uint16;
 42 |    typedef int32_t opus_int32;
 43 |    typedef uint32_t opus_uint32;
 44 | #elif defined(_WIN32)
 45 | 
 46 | #  if defined(__CYGWIN__)
 47 | #    include <_G_config.h>
 48 |      typedef _G_int32_t opus_int32;
 49 |      typedef _G_uint32_t opus_uint32;
 50 |      typedef _G_int16 opus_int16;
 51 |      typedef _G_uint16 opus_uint16;
 52 | #  elif defined(__MINGW32__)
 53 |      typedef short opus_int16;
 54 |      typedef unsigned short opus_uint16;
 55 |      typedef int opus_int32;
 56 |      typedef unsigned int opus_uint32;
 57 | #  elif defined(__MWERKS__)
 58 |      typedef int opus_int32;
 59 |      typedef unsigned int opus_uint32;
 60 |      typedef short opus_int16;
 61 |      typedef unsigned short opus_uint16;
 62 | #  else
 63 |      /* MSVC/Borland */
 64 |      typedef __int32 opus_int32;
 65 |      typedef unsigned __int32 opus_uint32;
 66 |      typedef __int16 opus_int16;
 67 |      typedef unsigned __int16 opus_uint16;
 68 | #  endif
 69 | 
 70 | #elif defined(__MACOS__)
 71 | 
 72 | #  include <sys/types.h>
 73 |    typedef SInt16 opus_int16;
 74 |    typedef UInt16 opus_uint16;
 75 |    typedef SInt32 opus_int32;
 76 |    typedef UInt32 opus_uint32;
 77 | 
 78 | #elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
 79 | 
 80 | #  include <sys/types.h>
 81 |    typedef int16_t opus_int16;
 82 |    typedef u_int16_t opus_uint16;
 83 |    typedef int32_t opus_int32;
 84 |    typedef u_int32_t opus_uint32;
 85 | 
 86 | #elif defined(__BEOS__)
 87 | 
 88 |    /* Be */
 89 | #  include <inttypes.h>
 90 |    typedef int16 opus_int16;
 91 |    typedef u_int16 opus_uint16;
 92 |    typedef int32_t opus_int32;
 93 |    typedef u_int32_t opus_uint32;
 94 | 
 95 | #elif defined (__EMX__)
 96 | 
 97 |    /* OS/2 GCC */
 98 |    typedef short opus_int16;
 99 |    typedef unsigned short opus_uint16;
100 |    typedef int opus_int32;
101 |    typedef unsigned int opus_uint32;
102 | 
103 | #elif defined (DJGPP)
104 | 
105 |    /* DJGPP */
106 |    typedef short opus_int16;
107 |    typedef unsigned short opus_uint16;
108 |    typedef int opus_int32;
109 |    typedef unsigned int opus_uint32;
110 | 
111 | #elif defined(R5900)
112 | 
113 |    /* PS2 EE */
114 |    typedef int opus_int32;
115 |    typedef unsigned opus_uint32;
116 |    typedef short opus_int16;
117 |    typedef unsigned short opus_uint16;
118 | 
119 | #elif defined(__SYMBIAN32__)
120 | 
121 |    /* Symbian GCC */
122 |    typedef signed short opus_int16;
123 |    typedef unsigned short opus_uint16;
124 |    typedef signed int opus_int32;
125 |    typedef unsigned int opus_uint32;
126 | 
127 | #elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
128 | 
129 |    typedef short opus_int16;
130 |    typedef unsigned short opus_uint16;
131 |    typedef long opus_int32;
132 |    typedef unsigned long opus_uint32;
133 | 
134 | #elif defined(CONFIG_TI_C6X)
135 | 
136 |    typedef short opus_int16;
137 |    typedef unsigned short opus_uint16;
138 |    typedef int opus_int32;
139 |    typedef unsigned int opus_uint32;
140 | 
141 | #else
142 | 
143 |    /* Give up, take a reasonable guess */
144 |    typedef short opus_int16;
145 |    typedef unsigned short opus_uint16;
146 |    typedef int opus_int32;
147 |    typedef unsigned int opus_uint32;
148 | 
149 | #endif
150 | 
151 | #define opus_int         int                     /* used for counters etc; at least 16 bits */
152 | #define opus_int64       long long
153 | #define opus_int8        signed char
154 | 
155 | #define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */
156 | #define opus_uint64      unsigned long long
157 | #define opus_uint8       unsigned char
158 | 
159 | #endif  /* OPUS_TYPES_H */
160 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/inc/tansig_table.h:
--------------------------------------------------------------------------------
 1 | /* This file is auto-generated by gen_tables */
 2 | 
 3 | static const float tansig_table[201] = {
 4 | 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
 5 | 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
 6 | 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
 7 | 0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
 8 | 0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
 9 | 0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
10 | 0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
11 | 0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
12 | 0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
13 | 0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
14 | 0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
15 | 0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
16 | 0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
17 | 0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
18 | 0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
19 | 0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
20 | 0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
21 | 0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
22 | 0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
23 | 0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
24 | 0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
25 | 0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
26 | 0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
27 | 0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
28 | 0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
29 | 0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
30 | 0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
31 | 0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
32 | 0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
33 | 0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
34 | 0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
35 | 0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
36 | 0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
37 | 0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
38 | 0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
39 | 0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
40 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
41 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
42 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
43 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
44 | 1.000000f,
45 | };
46 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/inc/vec.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018 Mozilla
  2 |                  2008-2011 Octasic Inc.
  3 |                  2012-2017 Jean-Marc Valin */
  4 | /*
  5 |    Redistribution and use in source and binary forms, with or without
  6 |    modification, are permitted provided that the following conditions
  7 |    are met:
  8 | 
  9 |    - Redistributions of source code must retain the above copyright
 10 |    notice, this list of conditions and the following disclaimer.
 11 | 
 12 |    - Redistributions in binary form must reproduce the above copyright
 13 |    notice, this list of conditions and the following disclaimer in the
 14 |    documentation and/or other materials provided with the distribution.
 15 | 
 16 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
 20 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 24 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 25 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | /* No AVX2/FMA support */
 29 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Sept.-Dec. 2020,
 30 |    marked by PLT_<Sep/Dec>20 */
 31 | 
 32 | #ifndef VEC_NEON_H
 33 | #define VEC_NEON_H
 34 | 
 35 | //PLT_Dec20
 36 | #include "tansig_table.h"
 37 | 
 38 | static float celt_exp2(float x)
 39 | {
 40 |    int integer;
 41 |    float frac;
 42 |    union {
 43 |       float f;
 44 |       opus_uint32 i;
 45 |    } res;
 46 |    integer = floor(x);
 47 |    if (integer < -50)
 48 |       return 0;
 49 |    frac = x-integer;
 50 |    /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
 51 |    res.f = 0.99992522f + frac * (0.69583354f
 52 |            + frac * (0.22606716f + 0.078024523f*frac));
 53 |    res.i = (res.i + (integer<<23)) & 0x7fffffff;
 54 |    return res.f;
 55 | }
 56 | #define celt_exp(x) celt_exp2((x)*1.44269504f)
 57 | 
 58 | static float tansig_approx(float x)
 59 | {
 60 |     int i;
 61 |     float y, dy;
 62 |     float sign=1;
 63 |     if (x<0)
 64 |     {
 65 |        x=-x;
 66 |        sign=-1;
 67 |     }
 68 |     i = (int)floor(.5f+25*x);
 69 |     i = IMAX(0, IMIN(200, i));
 70 |     x -= .04f*i;
 71 |     y = tansig_table[i];
 72 |     dy = 1-y*y;
 73 |     y = y + x*dy*(1 - y*x);
 74 |     return sign*y;
 75 | }
 76 | 
 77 | static OPUS_INLINE float sigmoid_approx(float x)
 78 | {
 79 |    return .5f + .5f*tansig_approx(.5f*x);
 80 | }
 81 | 
 82 | static void softmax(float *y, const float *x, int N)
 83 | {
 84 |     //int i;
 85 |     for (int i=0;i<N;i++)
 86 |         y[i] = celt_exp(x[i]);
 87 | }
 88 | 
 89 | static void vec_exp(float *y, const float *x, int N)
 90 | {
 91 |     //int i;
 92 |     for (int i=0;i<N;i++)
 93 |         y[i] = exp(x[i]);
 94 | }
 95 | 
 96 | static void vec_tanh(float *y, const float *x, int N)
 97 | {
 98 |     //int i;
 99 |     for (int i=0;i<N;i++)
100 |     {
101 |         y[i] = tansig_approx(x[i]);
102 |     }
103 | }
104 | 
105 | //PLT_Feb21
106 | static void vec_tanh_exp(float *y, const float *x, int N)
107 | {
108 |     //int i;
109 |     float ex2;
110 |     for (int i=0;i<N;i++)
111 |     {
112 |         //y[i] = tansig_approx(x[i]);
113 |         ex2 = exp(2*x[i]);
114 |         y[i] = (ex2-1)/(ex2+1);
115 |     }
116 | }
117 | 
118 | //PLT_Sep20
119 | static void vec_tanhshrink(float *y, const float *x, int N)
120 | {
121 |     //int i;
122 |     float ex2;
123 |     for (int i=0;i<N;i++)
124 |     {
125 |         //y[i] = x[i]-tansig_approx(x[i]);
126 |         ex2 = exp(2*x[i]);
127 |         y[i] = x[i]-(ex2-1)/(ex2+1);
128 |     }
129 | }
130 | 
131 | static void vec_sigmoid(float *y, const float *x, int N)
132 | {
133 |     //int i;
134 |     for (int i=0;i<N;i++)
135 |     {
136 |         y[i] = sigmoid_approx(x[i]);
137 |     }
138 | }
139 | 
140 | //PLT_Feb21
141 | static void vec_sigmoid_exp(float *y, const float *x, int N)
142 | {
143 |     //int i;
144 |     float ex;
145 |     for (int i=0;i<N;i++)
146 |     {
147 |         //y[i] = sigmoid_approx(x[i]);
148 |         ex = exp(x[i]);
149 |         y[i] = (ex)/(ex+1);
150 |     }
151 | }
152 | 
153 | static void sgemv_accum16(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
154 | {
155 |    int i, j;
156 |    for (i=0;i<rows;i+=16)
157 |    {
158 |       for (j=0;j<cols;j++)
159 |       {
160 |          const float * restrict w;
161 |          float * restrict y;
162 |          float xj;
163 |          w = &weights[j*col_stride + i];
164 |          xj = x[j];
165 |          y = &out[i];
166 |          y[0] += w[0]*xj;
167 |          y[1] += w[1]*xj;
168 |          y[2] += w[2]*xj;
169 |          y[3] += w[3]*xj;
170 |          y[4] += w[4]*xj;
171 |          y[5] += w[5]*xj;
172 |          y[6] += w[6]*xj;
173 |          y[7] += w[7]*xj;
174 |          y[8] += w[8]*xj;
175 |          y[9] += w[9]*xj;
176 |          y[10] += w[10]*xj;
177 |          y[11] += w[11]*xj;
178 |          y[12] += w[12]*xj;
179 |          y[13] += w[13]*xj;
180 |          y[14] += w[14]*xj;
181 |          y[15] += w[15]*xj;
182 |       }
183 |    }
184 | }
185 | 
186 | static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int *idx, const float *x)
187 | {
188 |    int i, j;
189 |    for (i=0;i<rows;i+=16)
190 |    {
191 |       int cols;
192 |       cols = *idx++;
193 |       for (j=0;j<cols;j++)
194 |       {
195 |          float * restrict y;
196 |          float xj;
197 |          xj = x[*idx++];
198 |          y = &out[i];
199 |          y[0] += w[0]*xj;
200 |          y[1] += w[1]*xj;
201 |          y[2] += w[2]*xj;
202 |          y[3] += w[3]*xj;
203 |          y[4] += w[4]*xj;
204 |          y[5] += w[5]*xj;
205 |          y[6] += w[6]*xj;
206 |          y[7] += w[7]*xj;
207 |          y[8] += w[8]*xj;
208 |          y[9] += w[9]*xj;
209 |          y[10] += w[10]*xj;
210 |          y[11] += w[11]*xj;
211 |          y[12] += w[12]*xj;
212 |          y[13] += w[13]*xj;
213 |          y[14] += w[14]*xj;
214 |          y[15] += w[15]*xj;
215 |          w += 16;
216 |       }
217 |    }
218 | }
219 | 
220 | #endif /* VEC_H */
221 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/demo_realtime_mid/inc/wave.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2021 Patrick Lumban Tobing (Nagoya University)
 3 |    Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 4 | 
 5 |    WAV file read/write is based on http://truelogic.org/wordpress/2015/09/04/parsing-a-wav-file-in-c
 6 | */
 7 | 
 8 | 
 9 | // WAVE file header format
10 | struct HEADER {
11 |     unsigned char riff[4];                      // RIFF string
12 |     unsigned int overall_size;               // overall size of file in bytes
13 |     unsigned char wave[4];                      // WAVE string
14 |     unsigned char fmt_chunk_marker[4];          // fmt string with trailing null char
15 |     unsigned int length_of_fmt;                 // length of the format data
16 |     unsigned short format_type;                   // format type. 1-PCM, 3- IEEE float, 6 - 8bit A law, 7 - 8bit mu law
17 |     unsigned short channels;                      // no.of channels
18 |     unsigned int sample_rate;                   // sampling rate (blocks per second)
19 |     unsigned int byterate;                      // SampleRate * NumChannels * BitsPerSample/8
20 |     unsigned short block_align;                   // NumChannels * BitsPerSample/8
21 |     unsigned short bits_per_sample;               // bits per sample, 8- 8bits, 16- 16 bits etc
22 |     unsigned char data_chunk_header [4];        // DATA string or FLLR string
23 |     unsigned int data_size;                     // NumSamples * NumChannels * BitsPerSample/8 - size of the next chunk that will be read
24 | };
25 | 
26 | /*
27 |     Positions   Sample Value    Description
28 |     1 – 4   “RIFF”  Marks the file as a riff file. Characters are each 1 byte long.
29 |     5 – 8   File size (integer)     Size of the overall file – 8 bytes, in bytes (32-bit integer). Typically, you’d fill this in after creation.
30 |     9 -12   “WAVE”  File Type Header. For our purposes, it always equals “WAVE”.
31 |     13-16   “fmt “  Format chunk marker. Includes trailing null
32 |     17-20   16  Length of format data as listed above
33 |     21-22   1   Type of format (1 is PCM) – 2 byte integer
34 |     23-24   2   Number of Channels – 2 byte integer
35 |     25-28   44100   Sample Rate – 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
36 |     29-32   176400  (Sample Rate * BitsPerSample * Channels) / 8.
37 |     33-34   4   (BitsPerSample * Channels) / 8.1 – 8 bit mono2 – 8 bit stereo/16 bit mono4 – 16 bit stereo
38 |     35-36   16  Bits per sample
39 |     37-40   “data”  “data” chunk header. Marks the beginning of the data section.
40 |     41-44   File size (data)    Size of the data section.
41 |     Sample values are given above for a 16-bit stereo source.
42 | 
43 |     It is important to note that the WAV format uses little-endian [LSB in smallest address] format to store bytes,
44 |     so you need to convert the bytes to big-endian [MSB in smallest address] in code for the values to make sense.
45 | */
46 | 
47 | short read_write_wav(FILE *fin, FILE *fout, short *num_reflected_right_edge_samples, long *num_samples, long *size_of_each_sample);
48 | long read_feat_write_wav(FILE* fin, FILE* fout, int bin_flag);
49 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/download_vcc20.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2021 Patrick Lumban Tobing (Nagoya University)
 4 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 5 | 
 6 | #http://vc-challenge.org/
 7 | #https://github.com/nii-yamagishilab/VCC2020-database
 8 | 
 9 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_training_source.zip
10 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_training_target_task1.zip
11 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_training_target_task2.zip
12 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_evaluation.zip
13 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_groundtruth.zip
14 | 
15 | unzip vcc2020_database_training_source.zip
16 | rm -vf vcc2020_database_training_source.zip
17 | unzip vcc2020_database_training_target_task1.zip
18 | rm -vf vcc2020_database_training_target_task1.zip
19 | unzip vcc2020_database_training_target_task2.zip
20 | rm -vf vcc2020_database_training_target_task2.zip
21 | unzip vcc2020_database_evaluation.zip
22 | rm -vf vcc2020_database_evaluation.zip
23 | unzip vcc2020_database_groundtruth.zip
24 | rm -vf vcc2020_database_groundtruth.zip
25 | 
26 | rm -vfr __MACOSX
27 | 
28 | trg_dir=wav_24kHz
29 | 
30 | mkdir -p ${trg_dir}
31 | 
32 | mv -v source/S* ${trg_dir}
33 | mv -v target_task1/T* ${trg_dir}
34 | mv -v target_task2/T* ${trg_dir}
35 | 
36 | mkdir -p ${trg_dir}/test
37 | 
38 | mv -v vcc2020_database_evaluation/S* ${trg_dir}/test
39 | mv -v vcc2020_database_groundtruth/T* ${trg_dir}/test
40 | 
41 | rm -vfr source
42 | rm -vfr target_task1
43 | rm -vfr target_task2
44 | rm -vfr vcc2020_database_evaluation
45 | rm -vfr vcc2020_database_groundtruth
46 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/get_spk_list.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #ls wav_24kHz/train > tmp
 4 | ls wav_24kHz > tmp
 5 | a=(`cat tmp`)
 6 | 
 7 | echo ${a[@]}
 8 | echo ${#a[@]}
 9 | 
10 | #ls wav_24kHz_unseen/test > tmp
11 | #a=(`cat tmp`)
12 | #
13 | #echo ${a[@]}
14 | #echo ${#a[@]}
15 | 
16 | rm -f tmp
17 | 


--------------------------------------------------------------------------------
/egs/cycvae_mwdlp_vcc20/path.sh:
--------------------------------------------------------------------------------
1 | export LD_LIBRARY_PATH=/usr/local/cuda-11/lib64:$LD_LIBRARY_PATH
2 | export CUDA_HOME=/usr/local/cuda-11
3 | export PRJ_ROOT=../..
4 | source $PRJ_ROOT/tools/venv/bin/activate
5 | export PATH=$PATH:$PRJ_ROOT/src/bin:$PRJ_ROOT/src/utils
6 | export PYTHONPATH=$PRJ_ROOT/src/nets:$PRJ_ROOT/src/utils
7 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/README.md:
--------------------------------------------------------------------------------
 1 | # High-fidelity, real-time, and low-latency universal neural vocoder with multiband WaveRNN using data-driven linear prediction (MWDLP)
 2 | 
 3 | 
 4 | This package uses Voice Conversion Challenge 2020 dataset [VCC20](http://vc-challenge.org/)
 5 | 
 6 | Real-time implementation is based on [LPCNet](https://github.com/mozilla/LPCNet/).
 7 | 
 8 | 
 9 | ## Data preparation
10 | ```
11 | $ bash download_vcc20.sh
12 | ```
13 | 
14 | ## Data preprocessing
15 | 1. Open **run.sh**
16 | 2. Set `stage=0init123`
17 | 3. Set a value of `n_jobs=` for number of parallel threads in preprocessing
18 | 4. `$ bash run.sh`
19 | 
20 | 
21 | ## Neural vocoder training [~ 4 days]
22 | 1. Open **run.sh**
23 | 2. Set `stage=4`
24 | 3. Set a value of `GPU_device=` for GPU device selection
25 | 4. `$ bash run.sh`
26 | 
27 | 
28 | ## Compile CPU real-time program
29 | 1. Open **run_realtime.sh**
30 | 2. Set `stage=0`
31 | 3. `$ bash run_realtime.sh`
32 | 
33 | 
34 | ## Analysis-synthesis decoding
35 | 1. Open **run_realtime.sh**
36 | 2. Set `stage=1`
37 | 3. Set values in `spks_dec=` for speakers to be synthesized
38 | 4. `$ bash run_realtime.sh`
39 | 
40 | 
41 | ## Decoding with mel-spectrogram output/input
42 | 1. Open **run_realtime.sh**
43 | 2. Set `stage=2`
44 | 3. Set values in `spks_dec=` for speakers to be synthesized
45 | 4. `$ bash run_realtime.sh`
46 | 
47 | 
48 | ## Contact
49 | 
50 | Patrick Lumbantobing
51 | 
52 | patrickltobing@gmail.com
53 | 
54 | patrick.lumbantobing@g.sp.m.is.nagoya-u.ac.jp
55 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/cmd.sh:
--------------------------------------------------------------------------------
 1 | # you can change cmd.sh depending on what type of queue you are using.
 2 | # If you have no queueing system and want to run on a local machine, you
 3 | # can change all instances 'queue.pl' to run.pl (but be careful and run
 4 | # commands one by one: most recipes will exhaust the memory on your
 5 | # machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
 6 | # with slurm.  Different queues are configured differently, with different
 7 | # queue names and different ways of specifying things like memory;
 8 | # to account for these differences you can create and edit the file
 9 | # conf/queue.conf to match your queue's configuration.  Search for
10 | # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
11 | # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
12 | 
13 | # for local
14 | export train_cmd="run.pl"
15 | export cuda_cmd="run.pl --gpu 1"
16 | export max_jobs=1
17 | 
18 | # for slurm (you can change configuration file "conf/slurm.conf")
19 | # export train_cmd="slurm.pl --config conf/slurm.conf"
20 | # export cuda_cmd="slurm.pl --hpc_gpu 1 --config conf/slurm.conf"
21 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/conf/config.yml:
--------------------------------------------------------------------------------
 1 | ## Sampling rate
 2 | #fs: 8000
 3 | #fs: 16000
 4 | #fs: 22050
 5 | fs: 24000
 6 | #fs: 44100
 7 | #fs: 48000
 8 | 
 9 | ## Frame-shift mel-cep/mel-spec extraction
10 | shiftms: 10
11 | 
12 | ## Window length mel-spectrogram extraction
13 | winms: 27.5
14 | 
15 | ## DC-component removal
16 | highpass_cutoff: 65
17 | 
18 | ## Mel-cepstrum
19 | mcep_dim: 49
20 | 
21 | ## Mel-spectrogram
22 | mel_dim: 80
23 | 
24 | ## Pre-emphasis noise-shaping coefficient
25 | alpha: 0.85
26 | 
27 | ## GRU hidden units wavernn
28 | ## for maximum performance, use 1184; for feasible usage use 1024
29 | hidden_units_wave: 1024
30 | #hidden_units_wave: 1184
31 | 
32 | hidden_units_wave_2: 32
33 | 
34 | ## Output dimension of FC layer before GRU for wavernn
35 | s_dim: 320
36 | 
37 | ## Output dimension of DualFC before final FC layer
38 | mid_dim: 16
39 | 
40 | ## GRU hidden layers wavernn
41 | hidden_layers_wave: 1
42 | 
43 | ## kernel-size input conv wavernn
44 | kernel_size_wave: 7
45 | dilation_size_wave: 1
46 | 
47 | ## use causal input convolution
48 | ## if using skewed input convolution for encoder (right_size > 0), set causal_conv dec/lf0 to true
49 | ## always use non-causal input conv. for encoder/wave
50 | causal_conv_wave: false
51 | ## if right_size > 0 (skewed conv encoder, i.e., future frame is limited), set causal_conv dec/lf0 to true [low-latency/real-time proc.]
52 | 
53 | ## use segmental convolution for wavernn
54 | #seg_conv_flag_wave: false
55 | seg_conv_flag_wave: true
56 | 
57 | ## learning rate
58 | lr: 1e-4
59 | 
60 | ## dropout rate
61 | do_prob: 0.5
62 | 
63 | ## maximum step count
64 | #step_count_wave: 20
65 | step_count_wave: 4350000
66 | 
67 | # number of workers (threads) for batch data handling
68 | n_workers: 1
69 | #n_workers: 2
70 | 
71 | mdl_name_wave: wavernn_dualgru_compact_lpc_mband_10bit_cf_stft_emb_v2
72 | 
73 | ## sparsification scheduling settings for wavernn
74 | #t_start: 1
75 | t_start: 20000
76 | #t_end: 20
77 | t_end: 1070000
78 | #interval: 1
79 | interval: 20
80 | densities: 0.09-0.09-0.12
81 | n_stage: 4
82 | 
83 | ## number of data-driven linear predictive coefficients (LPC) in wavernn
84 | #lpc: 0
85 | lpc: 8
86 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/conf/slurm.conf:
--------------------------------------------------------------------------------
 1 | command sbatch --export=PATH  --ntasks-per-node=1
 2 | option time=* --time $0
 3 | option mem=* --mem-per-cpu $0
 4 | option mem=0
 5 | option num_threads=* --cpus-per-task $0 --ntasks-per-node=1
 6 | option num_threads=1 --cpus-per-task 1  --ntasks-per-node=1
 7 | default gpu=0
 8 | option gpu=0 -p all,hpc
 9 | option hpc_gpu=* -p hpc --gres=gpu:$0 --time 10-00:00:00
10 | option all_gpu=* -p all,hpc --gres=gpu:$0 --time 10-00:00:00
11 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/Makefile:
--------------------------------------------------------------------------------
 1 | LDIR = lib
 2 | $(shell mkdir -p $(LDIR))
 3 | LIBNAME = mwdlp10
 4 | OUT = ${LDIR}/lib${LIBNAME}.a
 5 | 
 6 | CC = gcc
 7 | CFLAGS = -mavx2 -mfma -g -O3 -Wall -W -Wextra -fpic
 8 | LFLAGS = -lm
 9 | 
10 | ODIR = obj
11 | $(shell mkdir -p $(ODIR))
12 | 
13 | SDIR = src
14 | IDIR = inc
15 | BDIR = bin
16 | $(shell mkdir -p $(BDIR))
17 | 
18 | INC = -I${IDIR}
19 | LIB = -L${LDIR} -l${LIBNAME}
20 | TARGET = test_mwdlp
21 | 
22 | _OBJS = nnet.o mwdlp10net.o kiss_fft.o freq.o wave.o nnet_data.o
23 | OBJS = $(patsubst %,$(ODIR)/%,$(_OBJS))
24 | 
25 | 
26 | all: ${OUT}
27 | 	$(CC) $(CFLAGS) ${SDIR}/${TARGET}.c $(INC) ${LIB} ${LFLAGS} -o ${BDIR}/${TARGET}
28 | 
29 | $(OUT): $(OBJS) 
30 | 	ar rvs $(OUT) $^
31 | 
32 | $(ODIR)/%.o: $(SDIR)/%.c
33 | 	$(CC) $(CFLAGS) $(INC) -c -o $@ $< ${LFLAGS}
34 | 
35 | .PHONY: clean
36 | 
37 | clean:
38 | 	rm -f $(ODIR)/*.o $(OUT) ${BDIR}/${TARGET}
39 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/demo_anasyn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | #in_dir=wav_8kHz
 5 | #in_dir=wav_16kHz
 6 | in_dir=wav_24kHz
 7 | #out_dir=wav_anasyn_8kHz
 8 | #out_dir=wav_anasyn_16kHz
 9 | out_dir=wav_anasyn_24kHz
10 | 
11 | mkdir -p $out_dir
12 | 
13 | ls ${in_dir}/*.wav > tmp_anasyn.list
14 | 
15 | while read line;do
16 |     name=`basename $line`
17 |     echo $line $name
18 |     ./bin/test_mwdlp $line ${out_dir}/$name
19 |     #./bin/test_mwdlp.exe $line ${out_dir}/$name
20 | done < tmp_anasyn.list
21 | 
22 | rm -f tmp_anasyn.list
23 | 
24 | #split=(${line// / })
25 | #for spk in ${spks[@]};do
26 | #    spk_idx=$(( ${spk_idx}+1  ))
27 | #count=`expr $count + 1`
28 | #done
29 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/demo_melsp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | in_dir=wav
 5 | out_dir=wav_melsp
 6 | 
 7 | mkdir -p $out_dir
 8 | 
 9 | #ls ${in_dir}/*.wav > tmp.list
10 | #
11 | #while read line;do
12 | #    name=`basename $line .wav`
13 | #    echo $line $name
14 | #    ./bin/test_mwdlp -o melsp.bin melsp.txt $line ${out_dir}/${name}_anasyn.wav
15 | #    ./bin/test_mwdlp -b melsp.bin ${out_dir}/${name}_binsyn.wav
16 | #    ./bin/test_mwdlp -t melsp.txt ${out_dir}/${name}_txtsyn.wav
17 | #    ./bin/test_mwdlp.exe -o melsp.bin melsp.txt $line ${out_dir}/${name}_anasyn.wav
18 | #    ./bin/test_mwdlp.exe -b melsp.bin ${out_dir}/${name}_binsyn.wav
19 | #    ./bin/test_mwdlp.exe -t melsp.txt ${out_dir}/${name}_txtsyn.wav
20 | #done < tmp.list
21 | #
22 | #rm -f tmp.list
23 | 
24 | line=${in_dir}/001_p326.wav
25 | name=`basename $line .wav`
26 | 
27 | ./bin/test_mwdlp -o ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_melsp.txt $line ${out_dir}/${name}_anasyn.wav
28 | ./bin/test_mwdlp -b ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_binsyn.wav
29 | ./bin/test_mwdlp -t ${out_dir}/${name}_melsp.txt ${out_dir}/${name}_txtsyn.wav
30 | #./bin/test_mwdlp.exe -o ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_melsp.txt $line ${out_dir}/${name}_anasyn.wav
31 | #./bin/test_mwdlp.exe -b ${out_dir}/${name}_melsp.bin ${out_dir}/${name}_binsyn.wav
32 | #./bin/test_mwdlp.exe -t ${out_dir}/${name}_melsp.txt ${out_dir}/${name}_txtsyn.wav
33 | 
34 | #split=(${line// / })
35 | #for spk in ${spks[@]};do
36 | #    spk_idx=$(( ${spk_idx}+1  ))
37 | #count=`expr $count + 1`
38 | #done
39 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/_kiss_fft_guts.h:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2003-2004, Mark Borgerding
  2 | 
  3 |   All rights reserved.
  4 | 
  5 |   Redistribution and use in source and binary forms, with or without
  6 |    modification, are permitted provided that the following conditions are met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright notice,
  9 |        this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright notice,
 11 |        this list of conditions and the following disclaimer in the
 12 |        documentation and/or other materials provided with the distribution.
 13 | 
 14 |   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 |   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 |   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 |   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 18 |   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 19 |   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 20 |   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 |   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 22 |   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23 |   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24 |   POSSIBILITY OF SUCH DAMAGE.*/
 25 | 
 26 | #ifndef KISS_FFT_GUTS_H
 27 | #define KISS_FFT_GUTS_H
 28 | 
 29 | #define MIN(a,b) ((a)<(b) ? (a):(b))
 30 | #define MAX(a,b) ((a)>(b) ? (a):(b))
 31 | 
 32 | /* kiss_fft.h
 33 |    defines kiss_fft_scalar as either short or a float type
 34 |    and defines
 35 |    typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
 36 | #include "kiss_fft.h"
 37 | 
 38 | /*
 39 |   Explanation of macros dealing with complex math:
 40 | 
 41 |    C_MUL(m,a,b)         : m = a*b
 42 |    C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
 43 |    C_SUB( res, a,b)     : res = a - b
 44 |    C_SUBFROM( res , a)  : res -= a
 45 |    C_ADDTO( res , a)    : res += a
 46 |  * */
 47 | #ifdef FIXED_POINT
 48 | #include "arch.h"
 49 | 
 50 | 
 51 | #define SAMP_MAX 2147483647
 52 | #define TWID_MAX 32767
 53 | #define TRIG_UPSCALE 1
 54 | 
 55 | #define SAMP_MIN -SAMP_MAX
 56 | 
 57 | 
 58 | #   define S_MUL(a,b) MULT16_32_Q15(b, a)
 59 | 
 60 | #   define C_MUL(m,a,b) \
 61 |       do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
 62 |           (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
 63 | 
 64 | #   define C_MULC(m,a,b) \
 65 |       do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
 66 |           (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
 67 | 
 68 | #   define C_MULBYSCALAR( c, s ) \
 69 |       do{ (c).r =  S_MUL( (c).r , s ) ;\
 70 |           (c).i =  S_MUL( (c).i , s ) ; }while(0)
 71 | 
 72 | #   define DIVSCALAR(x,k) \
 73 |         (x) = S_MUL(  x, (TWID_MAX-((k)>>1))/(k)+1 )
 74 | 
 75 | #   define C_FIXDIV(c,div) \
 76 |         do {    DIVSCALAR( (c).r , div);  \
 77 |                 DIVSCALAR( (c).i  , div); }while (0)
 78 | 
 79 | #define  C_ADD( res, a,b)\
 80 |     do {(res).r=ADD32_ovflw((a).r,(b).r);  (res).i=ADD32_ovflw((a).i,(b).i); \
 81 |     }while(0)
 82 | #define  C_SUB( res, a,b)\
 83 |     do {(res).r=SUB32_ovflw((a).r,(b).r);  (res).i=SUB32_ovflw((a).i,(b).i); \
 84 |     }while(0)
 85 | #define C_ADDTO( res , a)\
 86 |     do {(res).r = ADD32_ovflw((res).r, (a).r);  (res).i = ADD32_ovflw((res).i,(a).i);\
 87 |     }while(0)
 88 | 
 89 | #define C_SUBFROM( res , a)\
 90 |     do {(res).r = ADD32_ovflw((res).r,(a).r);  (res).i = SUB32_ovflw((res).i,(a).i); \
 91 |     }while(0)
 92 | 
 93 | #if defined(OPUS_ARM_INLINE_ASM)
 94 | #include "arm/kiss_fft_armv4.h"
 95 | #endif
 96 | 
 97 | #if defined(OPUS_ARM_INLINE_EDSP)
 98 | #include "arm/kiss_fft_armv5e.h"
 99 | #endif
100 | #if defined(MIPSr1_ASM)
101 | #include "mips/kiss_fft_mipsr1.h"
102 | #endif
103 | 
104 | #else  /* not FIXED_POINT*/
105 | 
106 | #   define S_MUL(a,b) ( (a)*(b) )
107 | #define C_MUL(m,a,b) \
108 |     do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
109 |         (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
110 | #define C_MULC(m,a,b) \
111 |     do{ (m).r = (a).r*(b).r + (a).i*(b).i;\
112 |         (m).i = (a).i*(b).r - (a).r*(b).i; }while(0)
113 | 
114 | #define C_MUL4(m,a,b) C_MUL(m,a,b)
115 | 
116 | #   define C_FIXDIV(c,div) /* NOOP */
117 | #   define C_MULBYSCALAR( c, s ) \
118 |     do{ (c).r *= (s);\
119 |         (c).i *= (s); }while(0)
120 | #endif
121 | 
122 | #ifndef CHECK_OVERFLOW_OP
123 | #  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
124 | #endif
125 | 
126 | #ifndef C_ADD
127 | #define  C_ADD( res, a,b)\
128 |     do { \
129 |             CHECK_OVERFLOW_OP((a).r,+,(b).r)\
130 |             CHECK_OVERFLOW_OP((a).i,+,(b).i)\
131 |             (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
132 |     }while(0)
133 | #define  C_SUB( res, a,b)\
134 |     do { \
135 |             CHECK_OVERFLOW_OP((a).r,-,(b).r)\
136 |             CHECK_OVERFLOW_OP((a).i,-,(b).i)\
137 |             (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
138 |     }while(0)
139 | #define C_ADDTO( res , a)\
140 |     do { \
141 |             CHECK_OVERFLOW_OP((res).r,+,(a).r)\
142 |             CHECK_OVERFLOW_OP((res).i,+,(a).i)\
143 |             (res).r += (a).r;  (res).i += (a).i;\
144 |     }while(0)
145 | 
146 | #define C_SUBFROM( res , a)\
147 |     do {\
148 |             CHECK_OVERFLOW_OP((res).r,-,(a).r)\
149 |             CHECK_OVERFLOW_OP((res).i,-,(a).i)\
150 |             (res).r -= (a).r;  (res).i -= (a).i; \
151 |     }while(0)
152 | #endif /* C_ADD defined */
153 | 
154 | #ifdef FIXED_POINT
155 | /*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
156 | #  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
157 | #  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))
158 | #  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))
159 | #  define HALF_OF(x) ((x)>>1)
160 | #elif defined(USE_SIMD)
161 | #  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
162 | #  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
163 | #  define HALF_OF(x) ((x)*_mm_set1_ps(.5f))
164 | #else
165 | #  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
166 | #  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
167 | #  define HALF_OF(x) ((x)*.5f)
168 | #endif
169 | 
170 | #define  kf_cexp(x,phase) \
171 |         do{ \
172 |                 (x)->r = KISS_FFT_COS(phase);\
173 |                 (x)->i = KISS_FFT_SIN(phase);\
174 |         }while(0)
175 | 
176 | #define  kf_cexp2(x,phase) \
177 |    do{ \
178 |       (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
179 |       (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
180 | }while(0)
181 | 
182 | #endif /* KISS_FFT_GUTS_H */
183 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/common.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef COMMON_H
 4 | #define COMMON_H
 5 | 
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | #include <math.h>
 9 | 
10 | #define RNN_INLINE inline
11 | #define OPUS_INLINE inline
12 | 
13 | #define LOG256 5.5451774445f
14 | static RNN_INLINE float log2_approx(float x)
15 | {
16 |    int integer;
17 |    float frac;
18 |    union {
19 |       float f;
20 |       int i;
21 |    } in;
22 |    in.f = x;
23 |    integer = (in.i>>23)-127;
24 |    in.i -= integer<<23;
25 |    frac = in.f - 1.5f;
26 |    frac = -0.41445418f + frac*(0.95909232f
27 |           + frac*(-0.33951290f + frac*0.16541097f));
28 |    return 1+integer+frac;
29 | }
30 | 
31 | #define log_approx(x) (0.69315f*log2_approx(x))
32 | 
33 | 
34 | /** Copy n elements from src to dst. The 0* term provides compile-time type checking  */
35 | #ifndef OVERRIDE_RNN_COPY
36 | #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
37 | #endif
38 | 
39 | /** Copy n elements from src to dst, allowing overlapping regions. The 0* term
40 |     provides compile-time type checking */
41 | #ifndef OVERRIDE_RNN_MOVE
42 | #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
43 | #endif
44 | 
45 | /** Set n elements of dst to zero */
46 | #ifndef OVERRIDE_RNN_CLEAR
47 | #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
48 | #endif
49 | 
50 | 
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/freq.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2017-2018 Mozilla */
  2 | /*
  3 |    Redistribution and use in source and binary forms, with or without
  4 |    modification, are permitted provided that the following conditions
  5 |    are met:
  6 | 
  7 |    - Redistributions of source code must retain the above copyright
  8 |    notice, this list of conditions and the following disclaimer.
  9 | 
 10 |    - Redistributions in binary form must reproduce the above copyright
 11 |    notice, this list of conditions and the following disclaimer in the
 12 |    documentation and/or other materials provided with the distribution.
 13 | 
 14 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 15 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 16 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 17 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
 18 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 19 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 20 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 21 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 22 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 | */
 26 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Dec. 2020 - Aug. 2021,
 27 |    marked by PLT_<MonthYear> */
 28 | 
 29 | #include "freq_conf.h"
 30 | #include "kiss_fft.h"
 31 | 
 32 | 
 33 | //PLT_Dec20
 34 | /*
 35 |     Define these on freq_conf.h
 36 |     SAMPLING_FREQUENCY 16000 //fs
 37 |     FRAME_SHIFT 80 //int((fs/1000)*shiftms); shiftms = 5 ms
 38 |     WINDOW_LENGTH 440 //int((fs/1000)*winms); winms = 27.5 ms
 39 |     FFT_LENGTH 1024 //fs=8kHz-16kHz: 1024; 22.05kHz-24kHz: 2048; 44.1kHz-48kHz: 4096
 40 |     HPASS_FILT_TAPS 1023 //order+1, has to be odd because high-pass filter passes nyq. freq.
 41 | */
 42 | 
 43 | #define WINDOW_LENGTH_1 (WINDOW_LENGTH - 1)
 44 | #define WINDOW_LENGTH_2 (WINDOW_LENGTH_1 - 1) //for indexing right side window buffer
 45 | 
 46 | #define HALF_WINDOW_LENGTH_1 (WINDOW_LENGTH_1 / 2) //does not include 1st [0] and (1+((N-1)/2)+((N-1)%2))th [1] if (N-1)%2 == 1
 47 | 
 48 | #define WIN_PAD (FFT_LENGTH - WINDOW_LENGTH) //window is centered on total FFT length
 49 | 
 50 | #define WIN_PAD_LEFT (WIN_PAD / 2)
 51 | #define WIN_PAD_RIGHT (WIN_PAD_LEFT + (WIN_PAD % 2)) //right pad is more than 1 if total pad is odd
 52 | 
 53 | #define HALF_FFT_LENGTH (FFT_LENGTH / 2)
 54 | 
 55 | #define LEFT_SAMPLES (HALF_FFT_LENGTH - WIN_PAD_LEFT) //samples at left-side window / reflected samples at the left edge
 56 | #define RIGHT_SAMPLES (HALF_FFT_LENGTH - WIN_PAD_RIGHT) //samples at right-side window / reflected samples at the right edge
 57 | 
 58 | #define LEFT_SAMPLES_1 (LEFT_SAMPLES - 1) //for indexing first frame samples
 59 | #define LEFT_SAMPLES_2 (LEFT_SAMPLES_1 - 1) //for indexing first frame reflected samples
 60 | #define RIGHT_SAMPLES_1 (RIGHT_SAMPLES - 1) //for indexing first frame samples
 61 | 
 62 | #define WIN_LEFT_IDX (WIN_PAD_LEFT + 1) //0->439, index of centered 1st in total FFT-length, exclude first sample (+1) [0 coefficient]
 63 | #define WIN_RIGHT_IDX (WIN_LEFT_IDX - 1 + WINDOW_LENGTH - 1) //0->439, index of centered 440th in total FFT-length
 64 | 
 65 | #define BUFFER_LENGTH (WINDOW_LENGTH_1 - FRAME_SHIFT) //store samples for proceeding frame
 66 | 
 67 | #define HPASS_FILT_TAPS_1 (HPASS_FILT_TAPS - 1)
 68 | 
 69 | #define MAGSP_DIM (HALF_FFT_LENGTH + 1)
 70 | #define MELSP_MAGSP_DIM (MAGSP_DIM * MEL_DIM)
 71 | 
 72 | 
 73 | //PLT_Dec20
 74 | typedef struct {
 75 |     kiss_fft_state *kfft;
 76 |     float hpass_filt[HPASS_FILT_TAPS];
 77 |     float half_window[HALF_WINDOW_LENGTH_1];
 78 |     float samples_hpass[HPASS_FILT_TAPS];
 79 |     float samples_win[WINDOW_LENGTH_1]; //exclude first sample because of coefficient 0
 80 |     kiss_fft_cpx in_fft[FFT_LENGTH]; //initialized with zeros, fill in only centered window_length
 81 |     kiss_fft_cpx out_fft[FFT_LENGTH];
 82 |     float magsp[MAGSP_DIM];
 83 |     float melfb[MELSP_MAGSP_DIM];
 84 | } DSPState;
 85 | 
 86 | int dspstate_get_size();
 87 | 
 88 | DSPState *dspstate_create();
 89 | 
 90 | void dspstate_destroy(DSPState *dsp);
 91 | 
 92 | void shift_apply_hpassfilt(DSPState *dsp, float *x);
 93 | 
 94 | void apply_window(DSPState *dsp);
 95 | 
 96 | void shift_apply_window(DSPState *dsp, const float *x);
 97 | 
 98 | void mel_spec_extract(DSPState *dsp, float *melsp);
 99 | 
100 | //PLT_Aug21
101 | void mel_spec_warp_extract(DSPState *dsp, float *melsp, float pitchShift);
102 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/mwdlp10net.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2018 Mozilla */
 2 | /*
 3 |    Redistribution and use in source and binary forms, with or without
 4 |    modification, are permitted provided that the following conditions
 5 |    are met:
 6 | 
 7 |    - Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 |    - Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | */
26 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Dec. 2020 - Jul. 2021,
27 |    marked by PLT_<MonthYear> */
28 | 
29 | //PLT_Dec20
30 | #ifndef _MWDLP10NET_H_
31 | #define _MWDLP10NET_H_
32 | 
33 | #ifndef MWDLP10NET_EXPORT
34 | # if defined(WIN32)
35 | #  if defined(MWDLP10NET_BUILD) && defined(DLL_EXPORT)
36 | #   define MWDLP10NET_EXPORT __declspec(dllexport)
37 | #  else
38 | #   define MWDLP10NET_EXPORT
39 | #  endif
40 | # elif defined(__GNUC__) && defined(MWDLP10NET_BUILD)
41 | #  define MWDLP10NET_EXPORT __attribute__ ((visibility ("default")))
42 | # else
43 | #  define MWDLP10NET_EXPORT
44 | # endif
45 | #endif
46 | 
47 | 
48 | //PLT_Dec20
49 | typedef struct MWDLP10NetState MWDLP10NetState;
50 | 
51 | MWDLP10NET_EXPORT int mwdlp10net_get_size();
52 | 
53 | MWDLP10NET_EXPORT MWDLP10NetState *mwdlp10net_create();
54 | 
55 | MWDLP10NET_EXPORT void mwdlp10net_destroy(MWDLP10NetState *mwdlp10net);
56 | 
57 | //PLT_Jul21
58 | MWDLP10NET_EXPORT void mwdlp10net_synthesize(MWDLP10NetState *st, const float *features,
59 |     short *output, int *n_output, int flag_last_frame);
60 |     //short *output, int *n_output, int flag_last_frame, float *out_ddlpc_coarse, float *out_ddlpc_fine, short *pcm_band);
61 | 
62 | //PLT_Jul21
63 | MWDLP10NET_EXPORT void mwdlp10net_synthesize_nodlpc(MWDLP10NetState *st, const float *features,
64 |     short *output, int *n_output, int flag_last_frame);
65 |     //short *output, int *n_output, int flag_last_frame, short *pcm_band);
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/mwdlp10net_private.h:
--------------------------------------------------------------------------------
  1 | #ifndef MWDLP10NET_PRIVATE_H
  2 | #define MWDLP10NET_PRIVATE_H
  3 | 
  4 | #include "mwdlp10net.h"
  5 | #include "nnet.h"
  6 | #include "nnet_data.h"
  7 | 
  8 | //PLT_Dec20
  9 | /*
 10 |     define followings on nnet_data.h:
 11 |     RNN_MAIN_NEURONS, RNN_SUB_NEURONS,
 12 |     MID_OUT, N_QUANTIZE, SQRT_QUANTIZE,
 13 |     FEATURE_CONV_OUT_SIZE, N_MBANDS,
 14 |     DLPC_ORDER, PQMF_ORDER,
 15 |     N_SAMPLE_BANDS, FEATURES_DIM,
 16 |     CONV_KERNEL_1, FEATURE_CONV_STATE_SIZE, FEATURES_DIM, FEATURE_CONV_DELAY
 17 | */
 18 | #define RNN_MAIN_NEURONS_2 RNN_MAIN_NEURONS * 2
 19 | #define RNN_SUB_NEURONS_2 RNN_SUB_NEURONS * 2
 20 | 
 21 | #define RNN_MAIN_NEURONS_3 RNN_MAIN_NEURONS * 3
 22 | #define RNN_SUB_NEURONS_3 RNN_SUB_NEURONS * 3
 23 | 
 24 | #define RNN_MAIN_NEURONS_3_SQRT_QUANTIZE RNN_MAIN_NEURONS_3 * SQRT_QUANTIZE
 25 | #define RNN_SUB_NEURONS_3_SQRT_QUANTIZE RNN_SUB_NEURONS_3 * SQRT_QUANTIZE
 26 | 
 27 | #define NO_DLPC (DLPC_ORDER == 0)
 28 | #define NO_DLPC_MBANDS NO_DLPC * N_MBANDS
 29 | 
 30 | #define LPC_ORDER_MBANDS DLPC_ORDER * N_MBANDS
 31 | #define LPC_ORDER_MBANDS_2 LPC_ORDER_MBANDS * 2
 32 | #define LPC_ORDER_MBANDS_4 LPC_ORDER_MBANDS_2 * 2
 33 | #define LPC_ORDER_1_MBANDS (DLPC_ORDER - 1) * N_MBANDS
 34 | 
 35 | #define MID_OUT_MBANDS MID_OUT * N_MBANDS
 36 | #define MID_OUT_MBANDS_2 MID_OUT_MBANDS * 2
 37 | 
 38 | #define LPC_ORDER_MBANDS_3 LPC_ORDER_MBANDS * 3
 39 | #define LPC_ORDER_MBANDS_2_MID_OUT_MBANDS (LPC_ORDER_MBANDS_2 + MID_OUT_MBANDS)
 40 | #define LPC_ORDER_MBANDS_3_MID_OUT_MBANDS (LPC_ORDER_MBANDS_3 + MID_OUT_MBANDS)
 41 | #define LPC_ORDER_MBANDS_4_MID_OUT_MBANDS (LPC_ORDER_MBANDS_4 + MID_OUT_MBANDS)
 42 | 
 43 | #define FEATURE_CONV_STATE_SIZE_1 (FEATURE_CONV_STATE_SIZE - FEATURES_DIM)
 44 | 
 45 | /*
 46 | PQMF_DELAY is actually the number of samples on each of the left/right side of the current sample
 47 | for the kaiser window, i.e., half of the value of PQMF_ORDER (even number).
 48 | */
 49 | #define PQMF_DELAY PQMF_ORDER / 2
 50 | #define PQMF_ORDER_MBANDS PQMF_ORDER * N_MBANDS
 51 | #define N_MBANDS_SQR N_MBANDS * N_MBANDS
 52 | 
 53 | /*
 54 | A bit confusing, but PQMF_ORDER is the number of taps for kaiser window in pqmf.py.
 55 | So, it has to be an even number because covering left and right sides of the current sample t.
 56 | Because the number of points in kaiser window is 1+PQMF_ORDER, i.e, current_sample+(left+right).
 57 | */
 58 | #define TAPS (PQMF_ORDER + 1)
 59 | #define TAPS_MBANDS TAPS * N_MBANDS
 60 | 
 61 | /*
 62 | DLPC_ORDER is the number of coefficients for data-driven LPC,
 63 | i.e., the number of previous samples considered in the LP computation.
 64 | */
 65 | #define MDENSE_OUT_DUALFC (DLPC_ORDER * 2 + MID_OUT)
 66 | #define MDENSE_OUT_DUALFC_MBANDS MDENSE_OUT_DUALFC * N_MBANDS
 67 | #define MDENSE_OUT_DUALFC_2_MBANDS MDENSE_OUT_DUALFC_MBANDS * 2
 68 | #define MDENSE_OUT_FC (DLPC_ORDER * 2 + SQRT_QUANTIZE)
 69 | #define MDENSE_OUT_FC_MBANDS MDENSE_OUT_FC * N_MBANDS
 70 | #define SQRT_QUANTIZE_MBANDS SQRT_QUANTIZE * N_MBANDS
 71 | 
 72 | #define INIT_LAST_SAMPLE SQRT_QUANTIZE / 2
 73 | 
 74 | /*
 75 | MAX_N_OUTPUT either from FIRST n-outputs [due to remainder of (PQMF_DELAY+1) % N-BANDS
 76 |     because first samples are supposed to be PQMF_DELAY+1, but if the N-BANDS are not divisible
 77 |     by PQMF_DELAY, the remainder samples are actually the very first samples because the multiband synthesis
 78 |     is done in a multiple of N-BANDS, where 1 contribution to PQMF_DELAY+1 is automatically added
 79 |     after each synthesis]
 80 |     FIRST_N_OUTPUT = (((PQMF_DELAY / NBANDS) + (PQMF_DELAY % NBANDS)) * NBANDS) % PQMF_DELAY
 81 |     We want to find what is the minimum number of samples to reach the PQMF_DELAY,
 82 |     in a multiple of NBANDS, then take the remainder with respect to PQMF_DELAY
 83 |     as the very first output if exists.
 84 | LAST n-outputs [due to frame- and pqmf-delays, w/ right-side replicate- and zero-padding, respectively]
 85 | */
 86 | //need to add as (PQMF_DELAY + 0) to make the remainder operation works
 87 | #define FIRST_N_OUTPUT ((((PQMF_DELAY / N_MBANDS) + (PQMF_DELAY % N_MBANDS)) * N_MBANDS) % (PQMF_DELAY + 0))
 88 | #define MAX_N_OUTPUT IMAX((FIRST_N_OUTPUT + 1) * N_SAMPLE_BANDS * N_MBANDS, \
 89 |                     N_SAMPLE_BANDS * FEATURE_CONV_DELAY * N_MBANDS + PQMF_DELAY)
 90 | 
 91 | #define FIRST_N_OUTPUT_MBANDS FIRST_N_OUTPUT * N_MBANDS
 92 | #define PQMF_DELAY_MBANDS PQMF_DELAY * N_MBANDS
 93 | 
 94 | 
 95 | //PLT_Sep21
 96 | struct MWDLP10NetState {
 97 |     MWDLP10NNetState nnet;
 98 |     float mu_law_10_table[N_QUANTIZE];
 99 |     short last_coarse[LPC_ORDER_MBANDS+NO_DLPC_MBANDS];
100 |     short last_fine[LPC_ORDER_MBANDS+NO_DLPC_MBANDS];
101 |     int frame_count;
102 |     int sample_count;
103 |     int first_flag;
104 |     float deemph_mem;
105 |     //upsample-bands,zero-pad-right,NBxNB
106 |     float buffer_output[N_MBANDS_SQR];
107 |     /*
108 |         in_state pqmf_synth filt.,(ORD+1)*NB+(NB-1)*NB=ORD*NB+NB*NB
109 |         for the very first output, zeros to the left of the very first [{ORD-1}-th] as:
110 |         [[0,...,0]_1st,[0,...,0]_2nd,...,[[(1st,...,NB-th)*NB]_1st,[0,...0]_2nd,...,[0,...,0]_NB-th]]_{ORD+1}]
111 |         for NB-bands and kaiser_length=ORD+1, where at each time-index, the dimension is NB*NB
112 |         nonzeros for (1st*NB) and zeros for the (2nd-to-NB)*NB
113 |         it then shifts to the left for every new output
114 |     */
115 |     float pqmf_state[PQMF_ORDER_MBANDS+N_MBANDS_SQR];
116 |     //first in_state pqmf_synth filt.,(ORD+1)*NB+(FIRST_N_OUTPUT-1)*NB=ORD*NB+FIRST_N_OUTPUT*NB
117 |     float first_pqmf_state[PQMF_ORDER_MBANDS+FIRST_N_OUTPUT_MBANDS];
118 |     //last in_state pqmf_synth filt.,(ORD+1)*NB+(ORD//2-1)*NB=ORD*NB+DELAY*NB
119 |     float last_pqmf_state[PQMF_ORDER_MBANDS+PQMF_DELAY_MBANDS];
120 | #if defined(WINDOWS_SYS) || defined (GNU_EXT)
121 |     RNGState rng_state;
122 | #endif
123 | };
124 | 
125 | 
126 | #endif
127 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/nnet.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018 Mozilla
  2 |    Copyright (c) 2017 Jean-Marc Valin */
  3 | /*
  4 |    Redistribution and use in source and binary forms, with or without
  5 |    modification, are permitted provided that the following conditions
  6 |    are met:
  7 | 
  8 |    - Redistributions of source code must retain the above copyright
  9 |    notice, this list of conditions and the following disclaimer.
 10 | 
 11 |    - Redistributions in binary form must reproduce the above copyright
 12 |    notice, this list of conditions and the following disclaimer in the
 13 |    documentation and/or other materials provided with the distribution.
 14 | 
 15 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
 19 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 23 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 24 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | */
 27 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Sep. 2020 - Sep. 2021,
 28 |    marked by PLT_<MonthYear> */
 29 | 
 30 | #ifndef _NNET_H_
 31 | #define _NNET_H_
 32 | 
 33 | #include "arch.h"
 34 | 
 35 | #define ACTIVATION_LINEAR  0
 36 | #define ACTIVATION_SIGMOID 1
 37 | #define ACTIVATION_TANH    2
 38 | #define ACTIVATION_RELU    3
 39 | #define ACTIVATION_EXP 4 //PLT_Sep20
 40 | #define ACTIVATION_TANHSHRINK 5 //PLT_Sep20
 41 | #define ACTIVATION_TANH_EXP 6 //PLT_Sep20
 42 | 
 43 | typedef struct {
 44 |   const float *bias;
 45 |   const float *input_weights;
 46 |   int nb_inputs;
 47 |   int nb_neurons;
 48 |   int activation;
 49 | } DenseLayer;
 50 | 
 51 | //PLT_Dec20
 52 | typedef struct {
 53 |   const float *mean;
 54 |   const float *std;
 55 |   int n_dim;
 56 | } NormStats;
 57 | 
 58 | //PLT_May21
 59 | typedef struct {
 60 |   const float *bias;
 61 |   const float *input_weights;
 62 |   const float *factors;
 63 |   int activation;
 64 |   int activation_signs;
 65 |   int activation_mags;
 66 |   int activation_logits;
 67 | } MDenseLayerMWDLP10;
 68 | 
 69 | typedef struct {
 70 |   const float *bias;
 71 |   const float *input_weights;
 72 |   const float *recurrent_weights;
 73 |   int nb_inputs;
 74 |   int nb_neurons;
 75 |   int activation;
 76 |   int reset_after;
 77 | } GRULayer;
 78 | 
 79 | typedef struct {
 80 |   const float *bias;
 81 |   const float *diag_weights;
 82 |   const float *recurrent_weights;
 83 |   const int *idx;
 84 |   int nb_neurons;
 85 |   int activation;
 86 |   int reset_after;
 87 | } SparseGRULayer;
 88 | 
 89 | typedef struct {
 90 |   const float *bias;
 91 |   const float *input_weights;
 92 |   int nb_inputs;
 93 |   int kernel_size;
 94 |   int nb_neurons;
 95 |   int activation;
 96 | } Conv1DLayer;
 97 | 
 98 | typedef struct {
 99 |   const float *embedding_weights;
100 |   int nb_inputs;
101 |   int dim;
102 | } EmbeddingLayer;
103 | 
104 | //PLT_Sep21
105 | #if defined(WINDOWS_SYS) || defined (GNU_EXT)
106 |     typedef struct {
107 | #ifdef WINDOWS_SYS
108 |         BCRYPT_ALG_HANDLE rng_prov;
109 | #else
110 |     #ifdef GNU_EXT
111 |        unsigned short int xsubi[3];
112 |        struct drand48_data drand_buffer[1];
113 |     #endif
114 | #endif
115 |     } RNGState;
116 | #endif
117 | 
118 | //PLT_Aug21
119 | void sgemv_accum16_(float *out, const float *weights, int rows, int cols, const float *x);
120 | void sgemv_accum(float *out, const float *weights, int rows, int cols, const float *x);
121 | 
122 | void compute_activation(float *output, const float *input, int N, int activation);
123 | 
124 | void compute_dense(const DenseLayer *layer, float *output, const float *input);
125 | 
126 | //PLT_Dec20
127 | void compute_dense_linear(const DenseLayer *layer, float *output, const float *input);
128 | 
129 | //PLT_Mar21
130 | void compute_mdense_mwdlp10(const MDenseLayerMWDLP10 *layer, const DenseLayer *fc_layer, const float *prev_logits,
131 |     float *output, const float *input, const short *last_output);
132 |     //float *output, const float *input, const int *last_output, float* ddlpc);
133 | 
134 | //PLT_Mar21
135 | void compute_mdense_mwdlp10_nodlpc(const MDenseLayerMWDLP10 *layer, const DenseLayer *fc_layer, float *output,
136 |     const float *input);
137 | 
138 | void compute_gru3(const GRULayer *gru, float *state, const float *input);
139 | 
140 | void compute_sparse_gru(const SparseGRULayer *gru, float *state, const float *input);
141 | 
142 | //PLT_Jun21
143 | void compute_conv1d_linear_frame_in(const Conv1DLayer *layer, float *output, float *mem, const float *input);
144 | 
145 | //PLT_Sep21
146 | #if defined(WINDOWS_SYS) || defined (GNU_EXT)
147 |     int sample_from_pdf_mwdlp(const float *pdf, int N, RNGState *rng_state);
148 | #else
149 |     int sample_from_pdf_mwdlp(const float *pdf, int N);
150 | #endif
151 | 
152 | //PLT_Dec20
153 | void compute_normalize(const NormStats *norm_stats, float *input_output);
154 | 
155 | #endif /* _MLP_H_ */
156 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/opus_types.h:
--------------------------------------------------------------------------------
  1 | /* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */
  2 | /* Modified by Jean-Marc Valin */
  3 | /*
  4 |    Redistribution and use in source and binary forms, with or without
  5 |    modification, are permitted provided that the following conditions
  6 |    are met:
  7 | 
  8 |    - Redistributions of source code must retain the above copyright
  9 |    notice, this list of conditions and the following disclaimer.
 10 | 
 11 |    - Redistributions in binary form must reproduce the above copyright
 12 |    notice, this list of conditions and the following disclaimer in the
 13 |    documentation and/or other materials provided with the distribution.
 14 | 
 15 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 19 |    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 23 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 24 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | */
 27 | /* opus_types.h based on ogg_types.h from libogg */
 28 | 
 29 | /**
 30 |    @file opus_types.h
 31 |    @brief Opus reference implementation types
 32 | */
 33 | #ifndef OPUS_TYPES_H
 34 | #define OPUS_TYPES_H
 35 | 
 36 | /* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
 37 | #if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
 38 | #include <stdint.h>
 39 | 
 40 |    typedef int16_t opus_int16;
 41 |    typedef uint16_t opus_uint16;
 42 |    typedef int32_t opus_int32;
 43 |    typedef uint32_t opus_uint32;
 44 | #elif defined(_WIN32)
 45 | 
 46 | #  if defined(__CYGWIN__)
 47 | #    include <_G_config.h>
 48 |      typedef _G_int32_t opus_int32;
 49 |      typedef _G_uint32_t opus_uint32;
 50 |      typedef _G_int16 opus_int16;
 51 |      typedef _G_uint16 opus_uint16;
 52 | #  elif defined(__MINGW32__)
 53 |      typedef short opus_int16;
 54 |      typedef unsigned short opus_uint16;
 55 |      typedef int opus_int32;
 56 |      typedef unsigned int opus_uint32;
 57 | #  elif defined(__MWERKS__)
 58 |      typedef int opus_int32;
 59 |      typedef unsigned int opus_uint32;
 60 |      typedef short opus_int16;
 61 |      typedef unsigned short opus_uint16;
 62 | #  else
 63 |      /* MSVC/Borland */
 64 |      typedef __int32 opus_int32;
 65 |      typedef unsigned __int32 opus_uint32;
 66 |      typedef __int16 opus_int16;
 67 |      typedef unsigned __int16 opus_uint16;
 68 | #  endif
 69 | 
 70 | #elif defined(__MACOS__)
 71 | 
 72 | #  include <sys/types.h>
 73 |    typedef SInt16 opus_int16;
 74 |    typedef UInt16 opus_uint16;
 75 |    typedef SInt32 opus_int32;
 76 |    typedef UInt32 opus_uint32;
 77 | 
 78 | #elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
 79 | 
 80 | #  include <sys/types.h>
 81 |    typedef int16_t opus_int16;
 82 |    typedef u_int16_t opus_uint16;
 83 |    typedef int32_t opus_int32;
 84 |    typedef u_int32_t opus_uint32;
 85 | 
 86 | #elif defined(__BEOS__)
 87 | 
 88 |    /* Be */
 89 | #  include <inttypes.h>
 90 |    typedef int16 opus_int16;
 91 |    typedef u_int16 opus_uint16;
 92 |    typedef int32_t opus_int32;
 93 |    typedef u_int32_t opus_uint32;
 94 | 
 95 | #elif defined (__EMX__)
 96 | 
 97 |    /* OS/2 GCC */
 98 |    typedef short opus_int16;
 99 |    typedef unsigned short opus_uint16;
100 |    typedef int opus_int32;
101 |    typedef unsigned int opus_uint32;
102 | 
103 | #elif defined (DJGPP)
104 | 
105 |    /* DJGPP */
106 |    typedef short opus_int16;
107 |    typedef unsigned short opus_uint16;
108 |    typedef int opus_int32;
109 |    typedef unsigned int opus_uint32;
110 | 
111 | #elif defined(R5900)
112 | 
113 |    /* PS2 EE */
114 |    typedef int opus_int32;
115 |    typedef unsigned opus_uint32;
116 |    typedef short opus_int16;
117 |    typedef unsigned short opus_uint16;
118 | 
119 | #elif defined(__SYMBIAN32__)
120 | 
121 |    /* Symbian GCC */
122 |    typedef signed short opus_int16;
123 |    typedef unsigned short opus_uint16;
124 |    typedef signed int opus_int32;
125 |    typedef unsigned int opus_uint32;
126 | 
127 | #elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
128 | 
129 |    typedef short opus_int16;
130 |    typedef unsigned short opus_uint16;
131 |    typedef long opus_int32;
132 |    typedef unsigned long opus_uint32;
133 | 
134 | #elif defined(CONFIG_TI_C6X)
135 | 
136 |    typedef short opus_int16;
137 |    typedef unsigned short opus_uint16;
138 |    typedef int opus_int32;
139 |    typedef unsigned int opus_uint32;
140 | 
141 | #else
142 | 
143 |    /* Give up, take a reasonable guess */
144 |    typedef short opus_int16;
145 |    typedef unsigned short opus_uint16;
146 |    typedef int opus_int32;
147 |    typedef unsigned int opus_uint32;
148 | 
149 | #endif
150 | 
151 | #define opus_int         int                     /* used for counters etc; at least 16 bits */
152 | #define opus_int64       long long
153 | #define opus_int8        signed char
154 | 
155 | #define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */
156 | #define opus_uint64      unsigned long long
157 | #define opus_uint8       unsigned char
158 | 
159 | #endif  /* OPUS_TYPES_H */
160 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/tansig_table.h:
--------------------------------------------------------------------------------
 1 | /* This file is auto-generated by gen_tables */
 2 | 
 3 | static const float tansig_table[201] = {
 4 | 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
 5 | 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
 6 | 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
 7 | 0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
 8 | 0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
 9 | 0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
10 | 0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
11 | 0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
12 | 0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
13 | 0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
14 | 0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
15 | 0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
16 | 0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
17 | 0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
18 | 0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
19 | 0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
20 | 0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
21 | 0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
22 | 0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
23 | 0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
24 | 0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
25 | 0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
26 | 0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
27 | 0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
28 | 0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
29 | 0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
30 | 0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
31 | 0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
32 | 0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
33 | 0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
34 | 0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
35 | 0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
36 | 0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
37 | 0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
38 | 0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
39 | 0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
40 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
41 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
42 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
43 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
44 | 1.000000f,
45 | };
46 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/vec.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2018 Mozilla
  2 |                  2008-2011 Octasic Inc.
  3 |                  2012-2017 Jean-Marc Valin */
  4 | /*
  5 |    Redistribution and use in source and binary forms, with or without
  6 |    modification, are permitted provided that the following conditions
  7 |    are met:
  8 | 
  9 |    - Redistributions of source code must retain the above copyright
 10 |    notice, this list of conditions and the following disclaimer.
 11 | 
 12 |    - Redistributions in binary form must reproduce the above copyright
 13 |    notice, this list of conditions and the following disclaimer in the
 14 |    documentation and/or other materials provided with the distribution.
 15 | 
 16 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 |    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
 20 |    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 24 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 25 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | /* No AVX2/FMA support */
 29 | /* Modified by Patrick Lumban Tobing (Nagoya University) on Sept.-Dec. 2020,
 30 |    marked by PLT_<Sep/Dec>20 */
 31 | 
 32 | #ifndef VEC_NEON_H
 33 | #define VEC_NEON_H
 34 | 
 35 | //PLT_Dec20
 36 | #include "tansig_table.h"
 37 | 
 38 | static float celt_exp2(float x)
 39 | {
 40 |    int integer;
 41 |    float frac;
 42 |    union {
 43 |       float f;
 44 |       opus_uint32 i;
 45 |    } res;
 46 |    integer = floor(x);
 47 |    if (integer < -50)
 48 |       return 0;
 49 |    frac = x-integer;
 50 |    /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
 51 |    res.f = 0.99992522f + frac * (0.69583354f
 52 |            + frac * (0.22606716f + 0.078024523f*frac));
 53 |    res.i = (res.i + (integer<<23)) & 0x7fffffff;
 54 |    return res.f;
 55 | }
 56 | #define celt_exp(x) celt_exp2((x)*1.44269504f)
 57 | 
 58 | static float tansig_approx(float x)
 59 | {
 60 |     int i;
 61 |     float y, dy;
 62 |     float sign=1;
 63 |     if (x<0)
 64 |     {
 65 |        x=-x;
 66 |        sign=-1;
 67 |     }
 68 |     i = (int)floor(.5f+25*x);
 69 |     i = IMAX(0, IMIN(200, i));
 70 |     x -= .04f*i;
 71 |     y = tansig_table[i];
 72 |     dy = 1-y*y;
 73 |     y = y + x*dy*(1 - y*x);
 74 |     return sign*y;
 75 | }
 76 | 
 77 | static OPUS_INLINE float sigmoid_approx(float x)
 78 | {
 79 |    return .5f + .5f*tansig_approx(.5f*x);
 80 | }
 81 | 
 82 | static void softmax(float *y, const float *x, int N)
 83 | {
 84 |     //int i;
 85 |     for (int i=0;i<N;i++)
 86 |         y[i] = celt_exp(x[i]);
 87 | }
 88 | 
 89 | static void vec_exp(float *y, const float *x, int N)
 90 | {
 91 |     //int i;
 92 |     for (int i=0;i<N;i++)
 93 |         y[i] = exp(x[i]);
 94 | }
 95 | 
 96 | static void vec_tanh(float *y, const float *x, int N)
 97 | {
 98 |     //int i;
 99 |     for (int i=0;i<N;i++)
100 |     {
101 |         y[i] = tansig_approx(x[i]);
102 |     }
103 | }
104 | 
105 | //PLT_Feb21
106 | static void vec_tanh_exp(float *y, const float *x, int N)
107 | {
108 |     //int i;
109 |     float ex2;
110 |     for (int i=0;i<N;i++)
111 |     {
112 |         //y[i] = tansig_approx(x[i]);
113 |         ex2 = exp(2*x[i]);
114 |         y[i] = (ex2-1)/(ex2+1);
115 |     }
116 | }
117 | 
118 | //PLT_Sep20
119 | static void vec_tanhshrink(float *y, const float *x, int N)
120 | {
121 |     //int i;
122 |     float ex2;
123 |     for (int i=0;i<N;i++)
124 |     {
125 |         //y[i] = x[i]-tansig_approx(x[i]);
126 |         ex2 = exp(2*x[i]);
127 |         y[i] = x[i]-(ex2-1)/(ex2+1);
128 |     }
129 | }
130 | 
131 | static void vec_sigmoid(float *y, const float *x, int N)
132 | {
133 |     //int i;
134 |     for (int i=0;i<N;i++)
135 |     {
136 |         y[i] = sigmoid_approx(x[i]);
137 |     }
138 | }
139 | 
140 | static void sgemv_accum16(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
141 | {
142 |    int i, j;
143 |    for (i=0;i<rows;i+=16)
144 |    {
145 |       for (j=0;j<cols;j++)
146 |       {
147 |          const float * restrict w;
148 |          float * restrict y;
149 |          float xj;
150 |          w = &weights[j*col_stride + i];
151 |          xj = x[j];
152 |          y = &out[i];
153 |          y[0] += w[0]*xj;
154 |          y[1] += w[1]*xj;
155 |          y[2] += w[2]*xj;
156 |          y[3] += w[3]*xj;
157 |          y[4] += w[4]*xj;
158 |          y[5] += w[5]*xj;
159 |          y[6] += w[6]*xj;
160 |          y[7] += w[7]*xj;
161 |          y[8] += w[8]*xj;
162 |          y[9] += w[9]*xj;
163 |          y[10] += w[10]*xj;
164 |          y[11] += w[11]*xj;
165 |          y[12] += w[12]*xj;
166 |          y[13] += w[13]*xj;
167 |          y[14] += w[14]*xj;
168 |          y[15] += w[15]*xj;
169 |       }
170 |    }
171 | }
172 | 
173 | static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int *idx, const float *x)
174 | {
175 |    int i, j;
176 |    for (i=0;i<rows;i+=16)
177 |    {
178 |       int cols;
179 |       cols = *idx++;
180 |       for (j=0;j<cols;j++)
181 |       {
182 |          float * restrict y;
183 |          float xj;
184 |          xj = x[*idx++];
185 |          y = &out[i];
186 |          y[0] += w[0]*xj;
187 |          y[1] += w[1]*xj;
188 |          y[2] += w[2]*xj;
189 |          y[3] += w[3]*xj;
190 |          y[4] += w[4]*xj;
191 |          y[5] += w[5]*xj;
192 |          y[6] += w[6]*xj;
193 |          y[7] += w[7]*xj;
194 |          y[8] += w[8]*xj;
195 |          y[9] += w[9]*xj;
196 |          y[10] += w[10]*xj;
197 |          y[11] += w[11]*xj;
198 |          y[12] += w[12]*xj;
199 |          y[13] += w[13]*xj;
200 |          y[14] += w[14]*xj;
201 |          y[15] += w[15]*xj;
202 |          w += 16;
203 |       }
204 |    }
205 | }
206 | 
207 | #endif /* VEC_H */
208 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/demo_realtime/inc/wave.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright 2021 Patrick Lumban Tobing (Nagoya University)
 3 |    Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 4 | 
 5 |    WAV file read/write is based on http://truelogic.org/wordpress/2015/09/04/parsing-a-wav-file-in-c
 6 | */
 7 | 
 8 | 
 9 | // WAVE file header format
10 | struct HEADER {
11 |     unsigned char riff[4];                      // RIFF string
12 |     unsigned int overall_size;               // overall size of file in bytes
13 |     unsigned char wave[4];                      // WAVE string
14 |     unsigned char fmt_chunk_marker[4];          // fmt string with trailing null char
15 |     unsigned int length_of_fmt;                 // length of the format data
16 |     unsigned short format_type;                   // format type. 1-PCM, 3- IEEE float, 6 - 8bit A law, 7 - 8bit mu law
17 |     unsigned short channels;                      // no.of channels
18 |     unsigned int sample_rate;                   // sampling rate (blocks per second)
19 |     unsigned int byterate;                      // SampleRate * NumChannels * BitsPerSample/8
20 |     unsigned short block_align;                   // NumChannels * BitsPerSample/8
21 |     unsigned short bits_per_sample;               // bits per sample, 8- 8bits, 16- 16 bits etc
22 |     unsigned char data_chunk_header [4];        // DATA string or FLLR string
23 |     unsigned int data_size;                     // NumSamples * NumChannels * BitsPerSample/8 - size of the next chunk that will be read
24 | };
25 | 
26 | /*
27 |     Positions   Sample Value    Description
28 |     1 – 4   “RIFF”  Marks the file as a riff file. Characters are each 1 byte long.
29 |     5 – 8   File size (integer)     Size of the overall file – 8 bytes, in bytes (32-bit integer). Typically, you’d fill this in after creation.
30 |     9 -12   “WAVE”  File Type Header. For our purposes, it always equals “WAVE”.
31 |     13-16   “fmt “  Format chunk marker. Includes trailing null
32 |     17-20   16  Length of format data as listed above
33 |     21-22   1   Type of format (1 is PCM) – 2 byte integer
34 |     23-24   2   Number of Channels – 2 byte integer
35 |     25-28   44100   Sample Rate – 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
36 |     29-32   176400  (Sample Rate * BitsPerSample * Channels) / 8.
37 |     33-34   4   (BitsPerSample * Channels) / 8.1 – 8 bit mono2 – 8 bit stereo/16 bit mono4 – 16 bit stereo
38 |     35-36   16  Bits per sample
39 |     37-40   “data”  “data” chunk header. Marks the beginning of the data section.
40 |     41-44   File size (data)    Size of the data section.
41 |     Sample values are given above for a 16-bit stereo source.
42 | 
43 |     It is important to note that the WAV format uses little-endian [LSB in smallest address] format to store bytes,
44 |     so you need to convert the bytes to big-endian [MSB in smallest address] in code for the values to make sense.
45 | */
46 | 
47 | short read_write_wav_band(FILE *fin, FILE *fout, int n_bands);
48 | int read_feat_write_wav_band(FILE* fin, FILE* fout, int bin_flag, int n_bands);
49 | short read_write_wav(FILE *fin, FILE *fout, short *num_reflected_right_edge_samples, long *num_samples, long *size_of_each_sample);
50 | long read_feat_write_wav(FILE* fin, FILE* fout, int bin_flag);
51 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/download_vcc20.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2021 Patrick Lumban Tobing (Nagoya University)
 4 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 5 | 
 6 | #http://vc-challenge.org/
 7 | #https://github.com/nii-yamagishilab/VCC2020-database
 8 | 
 9 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_training_source.zip
10 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_training_target_task1.zip
11 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_training_target_task2.zip
12 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_evaluation.zip
13 | wget https://github.com/nii-yamagishilab/VCC2020-database/raw/master/vcc2020_database_groundtruth.zip
14 | 
15 | unzip vcc2020_database_training_source.zip
16 | rm -vf vcc2020_database_training_source.zip
17 | unzip vcc2020_database_training_target_task1.zip
18 | rm -vf vcc2020_database_training_target_task1.zip
19 | unzip vcc2020_database_training_target_task2.zip
20 | rm -vf vcc2020_database_training_target_task2.zip
21 | unzip vcc2020_database_evaluation.zip
22 | rm -vf vcc2020_database_evaluation.zip
23 | unzip vcc2020_database_groundtruth.zip
24 | rm -vf vcc2020_database_groundtruth.zip
25 | 
26 | rm -vfr __MACOSX
27 | 
28 | trg_dir=wav_24kHz
29 | 
30 | mkdir -p ${trg_dir}
31 | 
32 | mv -v source/S* ${trg_dir}
33 | mv -v target_task1/T* ${trg_dir}
34 | mv -v target_task2/T* ${trg_dir}
35 | 
36 | mkdir -p ${trg_dir}/test
37 | 
38 | mv -v vcc2020_database_evaluation/S* ${trg_dir}/test
39 | mv -v vcc2020_database_groundtruth/T* ${trg_dir}/test
40 | 
41 | rm -vfr source
42 | rm -vfr target_task1
43 | rm -vfr target_task2
44 | rm -vfr vcc2020_database_evaluation
45 | rm -vfr vcc2020_database_groundtruth
46 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/get_spk_list.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #ls wav_24kHz/train > tmp
 4 | ls wav_24kHz > tmp
 5 | a=(`cat tmp`)
 6 | 
 7 | echo ${a[@]}
 8 | echo ${#a[@]}
 9 | 
10 | #ls wav_24kHz_unseen/test > tmp
11 | #a=(`cat tmp`)
12 | #
13 | #echo ${a[@]}
14 | #echo ${#a[@]}
15 | 
16 | rm -f tmp
17 | 


--------------------------------------------------------------------------------
/egs/mwdlp_vcc20/path.sh:
--------------------------------------------------------------------------------
1 | export LD_LIBRARY_PATH=/usr/local/cuda-11/lib64:$LD_LIBRARY_PATH
2 | export CUDA_HOME=/usr/local/cuda-11
3 | export PRJ_ROOT=../..
4 | source $PRJ_ROOT/tools/venv/bin/activate
5 | export PATH=$PATH:$PRJ_ROOT/src/bin:$PRJ_ROOT/src/utils
6 | export PYTHONPATH=$PRJ_ROOT/src/nets:$PRJ_ROOT/src/utils
7 | 


--------------------------------------------------------------------------------
/src/bin/get_model_indices.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2021 Patrick Lumban Tobing (Nagoya University)
 4 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 5 | 
 6 | from __future__ import division
 7 | from __future__ import print_function
 8 | 
 9 | import argparse
10 | import os
11 | import sys
12 | 
13 | import torch
14 | 
15 | import logging
16 | 
17 | 
18 | def main():
19 |     parser = argparse.ArgumentParser(
20 |         description="making feature file argsurations.")
21 | 
22 |     parser.add_argument("--expdir", required=True,
23 |                         type=str, help="directory to save log")
24 |     parser.add_argument("--confdir", required=True,
25 |                         type=str, help="directory of model config.")
26 |     parser.add_argument("--verbose", default=1,
27 |                         type=int, help="log message level")
28 | 
29 |     args = parser.parse_args()
30 | 
31 |     os.environ["CUDA_VISIBLE_DEVICES"] = ""
32 | 
33 |     # set log level
34 |     if args.verbose == 1:
35 |         logging.basicConfig(level=logging.INFO,
36 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
37 |                             datefmt='%m/%d/%Y %I:%M:%S',
38 |                             filename=args.expdir + "/get_model_indices.log")
39 |         logging.getLogger().addHandler(logging.StreamHandler())
40 |     elif args.verbose > 1:
41 |         logging.basicConfig(level=logging.DEBUG,
42 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
43 |                             datefmt='%m/%d/%Y %I:%M:%S',
44 |                             filename=args.expdir + "/get_model_indices.log")
45 |         logging.getLogger().addHandler(logging.StreamHandler())
46 |     else:
47 |         logging.basicConfig(level=logging.WARN,
48 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
49 |                             datefmt='%m/%d/%Y %I:%M:%S',
50 |                             filename=args.expdir + "/get_model_indices.log")
51 |         logging.getLogger().addHandler(logging.StreamHandler())
52 |         logging.warn("logging is disabled.")
53 | 
54 |     checkpoint = torch.load(os.path.join(args.expdir, "checkpoint-last.pkl"), map_location=torch.device("cpu"))
55 |     last_epoch = checkpoint["iterations"]
56 |     min_idx_epoch = checkpoint["min_idx"]+1
57 |     logging.info(args.expdir)
58 |     logging.info(f'{last_epoch} {min_idx_epoch}')
59 | 
60 |     out_file = args.confdir+".idx"
61 |     logging.info(out_file)
62 |     f = open(out_file, 'w')
63 |     f.write('%d %d\n' % (last_epoch, min_idx_epoch))
64 |     f.close()
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     main()
69 | 


--------------------------------------------------------------------------------
/src/bin/gf_syn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | import librosa
 5 | import soundfile as sf
 6 | 
 7 | f = open('melsp.txt', 'r')
 8 | 
 9 | lines = f.readlines()
10 | 
11 | melmagsp = None
12 | for line in lines:
13 |     vals = np.expand_dims(np.array(line.strip().split(' ')).astype(np.float), axis=0)
14 |     if melmagsp is not None:
15 |         melmagsp = np.append(melmagsp, vals, axis=0)
16 |     else:
17 |         melmagsp = vals
18 | #    print(vals)
19 |     print(melmagsp.shape)
20 | 
21 | #fs = 22050
22 | fs = 24000
23 | #fftl = 1024
24 | fftl = 2048
25 | mel_dim = 80
26 | shiftms = 5
27 | #shiftms = 4.9886621315192743764172335600907
28 | #shiftms = 10
29 | #shiftms = 9.9773242630385487528344671201814
30 | winms = 27.5
31 | hop_length = int((fs/1000)*shiftms)
32 | win_length = int((fs/1000)*winms)
33 | 
34 | melfb_t = np.linalg.pinv(librosa.filters.mel(fs, fftl, n_mels=mel_dim))
35 | print(melfb_t.shape)
36 | recmagsp = np.matmul(melfb_t, melmagsp.T)
37 | print(recmagsp.shape)
38 | wav = np.clip(librosa.core.griffinlim(recmagsp, hop_length=hop_length,
39 |             win_length=win_length, window='hann'), -1, 0.999969482421875)
40 | print(wav.shape)
41 | sf.write('melsp_syn.wav', wav, fs, 'PCM_16')
42 | 


--------------------------------------------------------------------------------
/src/bin/min_pow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2021 Patrick Lumban Tobing (Nagoya University)
  4 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  5 | 
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import argparse
 10 | import os
 11 | import sys
 12 | 
 13 | import logging
 14 | 
 15 | import numpy as np
 16 | 
 17 | 
 18 | def main():
 19 |     parser = argparse.ArgumentParser(
 20 |         description="making feature file argsurations.")
 21 | 
 22 |     parser.add_argument("--expdir", required=True,
 23 |                         type=str, help="directory to save log")
 24 |     parser.add_argument("--featdir", required=True,
 25 |                         type=str, help="directory of feature extraction log")
 26 |     parser.add_argument("--confdir", required=True,
 27 |                         type=str, help="directory of speaker config.")
 28 |     parser.add_argument("--spk_list", required=True,
 29 |                         type=str, help="speaker list")
 30 |     parser.add_argument("--verbose", default=1,
 31 |                         type=int, help="log message level")
 32 | 
 33 |     args = parser.parse_args()
 34 | 
 35 |     # set log level
 36 |     if args.verbose == 1:
 37 |         logging.basicConfig(level=logging.INFO,
 38 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
 39 |                             datefmt='%m/%d/%Y %I:%M:%S',
 40 |                             filename=args.expdir + "/min_pow.log")
 41 |         logging.getLogger().addHandler(logging.StreamHandler())
 42 |     elif args.verbose > 1:
 43 |         logging.basicConfig(level=logging.DEBUG,
 44 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
 45 |                             datefmt='%m/%d/%Y %I:%M:%S',
 46 |                             filename=args.expdir + "/min_pow.log")
 47 |         logging.getLogger().addHandler(logging.StreamHandler())
 48 |     else:
 49 |         logging.basicConfig(level=logging.WARN,
 50 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
 51 |                             datefmt='%m/%d/%Y %I:%M:%S',
 52 |                             filename=args.expdir + "/min_pow.log")
 53 |         logging.getLogger().addHandler(logging.StreamHandler())
 54 |         logging.warn("logging is disabled.")
 55 | 
 56 |     spks = args.spk_list.split('@')
 57 |     folder = args.featdir
 58 |     conf = args.confdir
 59 |     logging.info(spks)
 60 |     logging.info(folder)
 61 |     logging.info(conf)
 62 |     for spk in spks:
 63 |         logging.info(spk)
 64 |         in_file = os.path.join(folder,spk+'_npowhistogram.txt')
 65 |         logging.info(in_file)
 66 |         arr_data = np.loadtxt(in_file)
 67 |     
 68 |         length = arr_data.shape[0]
 69 |         peak_1 = -999999999
 70 |         peak_1_idx = 0
 71 |         global_min = 999999999
 72 |         global_min_idx = length // 2 - 1
 73 |         peak_2 = -999999999
 74 |         peak_2_idx = length-1
 75 |         list_min_global_idx = []
 76 |     
 77 |         for i in range(length // 2 - 2):
 78 |             if arr_data[i][1] > peak_1:
 79 |                 peak_1_idx = i
 80 |                 peak_1 = arr_data[i][1]
 81 |         for i in range(length-1,(length - length // 3),-1):
 82 |             if arr_data[i][1] > peak_2:
 83 |                 peak_2_idx = i
 84 |                 peak_2 = arr_data[i][1]
 85 |         for i in range(length):
 86 |             if arr_data[i][1] <= global_min and i > peak_1_idx and i < peak_2_idx:
 87 |                 global_min_idx = i
 88 |                 if arr_data[i][1] == global_min:
 89 |                     list_min_global_idx.append(arr_data[i][0])
 90 |                 else:
 91 |                     list_min_global_idx = []
 92 |                     list_min_global_idx.append(arr_data[i][0])
 93 |                 global_min = arr_data[i][1]
 94 |         min_pow = np.mean(list_min_global_idx)
 95 |     
 96 |         logging.info('%d %d %lf' % (peak_1_idx, arr_data[peak_1_idx][0], peak_1))
 97 |         logging.info('%d %d %lf' % (global_min_idx, arr_data[global_min_idx][0], global_min))
 98 |         logging.info('%d %d %lf' % (peak_2_idx, arr_data[peak_2_idx][0], peak_2))
 99 |         logging.info(list_min_global_idx)
100 |         logging.info(min_pow)
101 |         out_file = os.path.join(conf,spk+'.pow')
102 |         logging.info(out_file)
103 |         f = open(out_file, 'w')
104 |         f.write('%.1f\n' % (min_pow))
105 |         f.close()
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     main()
110 | 


--------------------------------------------------------------------------------
/src/bin/noise_shaping_emph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2021 Patrick Lumban Tobing (Nagoya University)
  5 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  6 | 
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import argparse
 11 | from distutils.util import strtobool
 12 | import multiprocessing as mp
 13 | import os
 14 | import sys
 15 | 
 16 | import numpy as np
 17 | import soundfile as sf
 18 | from scipy.signal import lfilter
 19 | 
 20 | from utils import find_files
 21 | from utils import read_txt
 22 | 
 23 | ##FS = 16000
 24 | #FS = 22050
 25 | FS = 24000
 26 | ##FS = 44100
 27 | ##FS = 48000
 28 | ALPHA = 0.85
 29 | 
 30 | def preemphasis(x, alpha=ALPHA):
 31 |     b = np.array([1., -alpha], x.dtype)
 32 |     a = np.array([1.], x.dtype)
 33 |     return lfilter(b, a, x)
 34 | 
 35 | 
 36 | def deemphasis(x, alpha=ALPHA):
 37 |     b = np.array([1.], x.dtype)
 38 |     a = np.array([1., -alpha], x.dtype)
 39 |     return lfilter(b, a, x)
 40 | 
 41 | 
 42 | def main():
 43 |     parser = argparse.ArgumentParser(
 44 |         description="making feature file argsurations.")
 45 | 
 46 |     parser.add_argument(
 47 |         "--waveforms", default=None,
 48 |         help="directory or list of filename of input wavfile")
 49 |     parser.add_argument(
 50 |         "--writedir", default=None,
 51 |         help="directory to save preprocessed wav file")
 52 |     parser.add_argument(
 53 |         "--fs", default=FS,
 54 |         type=int, help="Sampling frequency")
 55 |     parser.add_argument(
 56 |         "--alpha", default=ALPHA,
 57 |         type=float, help="coefficient of pre-emphasis")
 58 |     parser.add_argument(
 59 |         "--verbose", default=1,
 60 |         type=int, help="log message level")
 61 |     parser.add_argument(
 62 |         '--n_jobs', default=1,
 63 |         type=int, help="number of parallel jobs")
 64 |     parser.add_argument(
 65 |         '--inv', default=False, type=strtobool,
 66 |         help="if True, inverse filtering will be performed")
 67 |     args = parser.parse_args()
 68 | 
 69 |     # read list
 70 |     if os.path.isdir(args.waveforms):
 71 |         file_list = sorted(find_files(args.waveforms, "*.wav"))
 72 |     else:
 73 |         file_list = read_txt(args.waveforms)
 74 | 
 75 |     # check directory existence
 76 |     if not os.path.exists(args.writedir):
 77 |         os.makedirs(args.writedir)
 78 | 
 79 |     def noise_shaping(wav_list):
 80 |         for wav_name in wav_list:
 81 |             # load wavfile and apply low cut filter
 82 |             x, fs = sf.read(wav_name)
 83 | 
 84 |             ## check sampling frequency
 85 |             if not fs == args.fs:
 86 |                 print("ERROR: sampling frequency is not matched.")
 87 |                 sys.exit(1)
 88 | 
 89 |             ## synthesis and write
 90 |             if not args.inv:
 91 |                 x_ns = preemphasis(x, alpha=args.alpha)
 92 |             else:
 93 |                 x_ns = deemphasis(x, alpha=args.alpha)
 94 |             write_name = args.writedir + "/" + os.path.basename(wav_name)
 95 |             sf.write(write_name, np.clip(x_ns, -1, 0.999969482421875), args.fs, 'PCM_16')
 96 | 
 97 |     # divie list
 98 |     file_lists = np.array_split(file_list, args.n_jobs)
 99 |     file_lists = [f_list.tolist() for f_list in file_lists]
100 | 
101 |     # multi processing
102 |     processes = []
103 |     for f in file_lists:
104 |         p = mp.Process(target=noise_shaping, args=(f,))
105 |         p.start()
106 |         processes.append(p)
107 | 
108 |     # wait for all process
109 |     for p in processes:
110 |         p.join()
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------
/src/bin/proc_wav_pqmf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2021 Patrick Lumban Tobing (Nagoya University)
  5 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  6 | 
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import argparse
 11 | from distutils.util import strtobool
 12 | import multiprocessing as mp
 13 | import os
 14 | import sys
 15 | 
 16 | import numpy as np
 17 | import soundfile as sf
 18 | from scipy.signal import lfilter
 19 | 
 20 | import torch
 21 | 
 22 | from pqmf import PQMF
 23 | 
 24 | from utils import find_files
 25 | from utils import read_txt
 26 | 
 27 | ##FS = 16000
 28 | #FS = 22050
 29 | FS = 24000
 30 | ##FS = 44100
 31 | ##FS = 48000
 32 | ALPHA = 0.85
 33 | N_BANDS = 5
 34 | 
 35 | 
 36 | def deemphasis(x, alpha=ALPHA):
 37 |     b = np.array([1.], x.dtype)
 38 |     a = np.array([1., -alpha], x.dtype)
 39 |     return lfilter(b, a, x)
 40 | 
 41 | 
 42 | def main():
 43 |     parser = argparse.ArgumentParser(
 44 |         description="making feature file argsurations.")
 45 | 
 46 |     parser.add_argument(
 47 |         "--waveforms", default=None,
 48 |         help="directory or list of filename of input wavfile")
 49 |     parser.add_argument(
 50 |         "--writedir", default=None,
 51 |         help="directory to save preprocessed wav file")
 52 |     parser.add_argument(
 53 |         "--writesyndir", default=None,
 54 |         help="directory to save preprocessed wav file")
 55 |     parser.add_argument(
 56 |         "--fs", default=FS,
 57 |         type=int, help="Sampling frequency")
 58 |     parser.add_argument(
 59 |         "--n_bands", default=N_BANDS,
 60 |         type=int, help="number of bands for multiband analysis")
 61 |     parser.add_argument(
 62 |         "--alpha", default=ALPHA,
 63 |         type=float, help="coefficient of pre-emphasis")
 64 |     parser.add_argument(
 65 |         "--verbose", default=1,
 66 |         type=int, help="log message level")
 67 |     parser.add_argument(
 68 |         '--n_jobs', default=1,
 69 |         type=int, help="number of parallel jobs")
 70 |     args = parser.parse_args()
 71 | 
 72 |     # read list
 73 |     if os.path.isdir(args.waveforms):
 74 |         file_list = sorted(find_files(args.waveforms, "*.wav"))
 75 |     else:
 76 |         file_list = read_txt(args.waveforms)
 77 | 
 78 |     # check directory existence
 79 |     if not os.path.exists(args.writedir):
 80 |         os.makedirs(args.writedir)
 81 |     if not os.path.exists(args.writesyndir):
 82 |         os.makedirs(args.writesyndir)
 83 | 
 84 |     def noise_shaping(wav_list):
 85 |         pqmf = PQMF(args.n_bands)
 86 |         print(f'{pqmf.subbands} {pqmf.A} {pqmf.taps} {pqmf.cutoff_ratio} {pqmf.beta}')
 87 |         #fs_band = args.fs // args.n_bands
 88 |         #print(f'{pqmf.subbands} {pqmf.A} {pqmf.taps} {pqmf.cutoff_ratio} {pqmf.beta} {fs_band}')
 89 |         for wav_name in wav_list:
 90 |             x, fs = sf.read(wav_name)
 91 | 
 92 |             ## check sampling frequency
 93 |             if not fs == args.fs:
 94 |                 print("ERROR: sampling frequency is not matched.")
 95 |                 sys.exit(1)
 96 | 
 97 |             x_bands_ana = pqmf.analysis(torch.FloatTensor(x).unsqueeze(0).unsqueeze(0))
 98 |             print(x_bands_ana.shape)
 99 |             x_bands_syn = pqmf.synthesis(x_bands_ana)
100 |             print(x_bands_syn.shape)
101 |             for i in range(args.n_bands):
102 |                 wav = np.clip(x_bands_ana[0,i].data.numpy(), -1, 0.999969482421875)
103 |                 if args.n_bands < 10:
104 |                     wavpath = os.path.join(args.writedir, os.path.basename(wav_name).split(".")[0]+"_B-"+str(i+1)+".wav")
105 |                 else:
106 |                     if i < args.n_bands - 1:
107 |                         wavpath = os.path.join(args.writedir, os.path.basename(wav_name).split(".")[0]+"_B-0"+str(i+1)+".wav")
108 |                     else:
109 |                         wavpath = os.path.join(args.writedir, os.path.basename(wav_name).split(".")[0]+"_B-"+str(i+1)+".wav")
110 |                 print(wavpath)
111 |                 sf.write(wavpath, wav, fs, 'PCM_16')
112 |                 #sf.write(wavpath, wav, fs_band, 'PCM_16')
113 |             wav = np.clip(x_bands_syn[0,0].data.numpy(), -1, 0.999969482421875)
114 |             wav = deemphasis(wav, alpha=args.alpha)
115 |             wavpath = os.path.join(args.writesyndir, os.path.basename(wav_name))
116 |             print(wavpath)
117 |             sf.write(wavpath, wav, fs, 'PCM_16')
118 | 
119 | 
120 |     # divie list
121 |     file_lists = np.array_split(file_list, args.n_jobs)
122 |     file_lists = [f_list.tolist() for f_list in file_lists]
123 | 
124 |     # multi processing
125 |     processes = []
126 |     for f in file_lists:
127 |         p = mp.Process(target=noise_shaping, args=(f,))
128 |         p.start()
129 |         processes.append(p)
130 | 
131 |     # wait for all process
132 |     for p in processes:
133 |         p.join()
134 | 
135 | 
136 | if __name__ == "__main__":
137 |     main()
138 | 


--------------------------------------------------------------------------------
/src/bin/spk_stat.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2019 Patrick Lumban Tobing (Nagoya University)
  5 | # based on a VC implementation by Kazuhiro Kobayashi (Nagoya University)
  6 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  7 | 
  8 | import argparse
  9 | import os
 10 | from pathlib import Path
 11 | import logging
 12 | 
 13 | import matplotlib
 14 | import numpy as np
 15 | from utils import check_hdf5
 16 | from utils import read_hdf5
 17 | from utils import read_txt
 18 | from utils import write_hdf5
 19 | 
 20 | matplotlib.use('Agg')
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | 
 24 | def main():
 25 |     parser = argparse.ArgumentParser()
 26 | 
 27 |     parser.add_argument(
 28 |         "--feats", default=None, required=True,
 29 |         help="name of the list of hdf5 files")
 30 |     parser.add_argument("--expdir", required=True,
 31 |         type=str, help="directory to save the log")
 32 |     parser.add_argument(
 33 |         "--verbose", default=1,
 34 |         type=int, help="log message level")
 35 | 
 36 |     args = parser.parse_args()
 37 | 
 38 |     # set log level
 39 |     if args.verbose == 1:
 40 |         logging.basicConfig(level=logging.INFO,
 41 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
 42 |                             datefmt='%m/%d/%Y %I:%M:%S',
 43 |                             filename=args.expdir + "/spk_stat.log")
 44 |         logging.getLogger().addHandler(logging.StreamHandler())
 45 |     elif args.verbose > 1:
 46 |         logging.basicConfig(level=logging.DEBUG,
 47 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
 48 |                             datefmt='%m/%d/%Y %I:%M:%S',
 49 |                             filename=args.expdir + "/spk_stat.log")
 50 |         logging.getLogger().addHandler(logging.StreamHandler())
 51 |     else:
 52 |         logging.basicConfig(level=logging.WARN,
 53 |                             format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
 54 |                             datefmt='%m/%d/%Y %I:%M:%S',
 55 |                             filename=args.expdir + "/spk_stat.log")
 56 |         logging.getLogger().addHandler(logging.StreamHandler())
 57 |         logging.warn("logging is disabled.")
 58 | 
 59 |     filenames = read_txt(args.feats)
 60 |     logging.info("number of training utterances = %d" % len(filenames))
 61 | 
 62 |     npows = np.empty((0))
 63 |     f0s = np.empty((0))
 64 |     # process over all of data
 65 |     for filename in filenames:
 66 |         logging.info(filename)
 67 |         f0 = read_hdf5(filename, "/f0")
 68 |         npow = read_hdf5(filename, "/npow")
 69 |         nonzero_indices = np.nonzero(f0)
 70 |         logging.info(f0[nonzero_indices].shape)
 71 |         logging.info(f0s.shape)
 72 |         f0s = np.concatenate([f0s,f0[nonzero_indices]])
 73 |         logging.info(f0s.shape)
 74 |         logging.info(npows.shape)
 75 |         npows = np.concatenate([npows,npow])
 76 |         logging.info(npows.shape)
 77 | 
 78 |     spkr = os.path.basename(args.feats).split('.')[0].split('-')[-1]
 79 | 
 80 |     plt.rcParams["figure.figsize"] = (20,11.25) #1920x1080
 81 | 
 82 |     # create a histogram to visualize F0 range of the speaker
 83 |     f0histogrampath = os.path.join(args.expdir, spkr + '_f0histogram.png')
 84 |     f0hist, f0bins, _ = plt.hist(f0s, bins=500, range=(50, 550),
 85 |         density=True, histtype="stepfilled")
 86 |     # plot with matplotlib
 87 |     plt.xlabel('Fundamental frequency [Hz]')
 88 |     plt.ylabel("Probability")
 89 |     plt.xticks(np.arange(50, 551, 10), rotation=45)
 90 |     figure_dir = os.path.dirname(f0histogrampath)
 91 |     if not os.path.exists(figure_dir):
 92 |         os.makedirs(figure_dir)
 93 |     plt.savefig(f0histogrampath)
 94 |     plt.close()
 95 |     # save values to txt
 96 |     f0histogrampath = os.path.join(args.expdir, spkr + '_f0histogram.txt')
 97 |     f = open(f0histogrampath, 'w')
 98 |     for i in range(f0hist.shape[0]):
 99 |         f.write('%d %.9f\n' % (f0bins[i], f0hist[i]))
100 |     f.close()
101 | 
102 |     # create a histogram to visualize npow range of the speaker
103 |     npowhistogrampath = os.path.join(args.expdir, spkr + '_npowhistogram.png')
104 |     npowhist, npowbins, _ = plt.hist(npows, bins=120, range=(-50, 10),
105 |         density=True, histtype="stepfilled")
106 |     # plot with matplotlib
107 |     plt.xlabel('Frame power [dB]')
108 |     plt.ylabel("Probability")
109 |     plt.xticks(np.arange(-50, 11, 1), rotation=45)
110 |     figure_dir = os.path.dirname(npowhistogrampath)
111 |     if not os.path.exists(figure_dir):
112 |         os.makedirs(figure_dir)
113 |     plt.savefig(npowhistogrampath)
114 |     plt.close()
115 |     # save values to txt
116 |     npowhistogrampath = os.path.join(args.expdir, spkr + '_npowhistogram.txt')
117 |     f = open(npowhistogrampath, 'w')
118 |     for i in range(npowhist.shape[0]):
119 |         f.write('%.1f %.9f\n' % (npowbins[i], npowhist[i]))
120 |     f.close()
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     main()
125 | 


--------------------------------------------------------------------------------
/src/nets/pqmf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright 2020 Tomoki Hayashi
  4 | #  MIT License (https://opensource.org/licenses/MIT)
  5 | # Modified by Patrick Lumban Tobing on August 2020
  6 | 
  7 | """Pseudo QMF modules."""
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | import torch.nn.functional as F
 12 | 
 13 | from scipy.signal import kaiser
 14 | 
 15 | 
 16 | def design_prototype_filter(taps=64, cutoff_ratio=0.15, beta=10.06126):
 17 |     """Design prototype filter for PQMF.
 18 | 
 19 |     This method is based on `A Kaiser window approach for the design of prototype
 20 |     filters of cosine modulated filterbanks`_.
 21 | 
 22 |     Args:
 23 |         taps (int): The number of filter taps.
 24 |         cutoff_ratio (float): Cut-off frequency ratio.
 25 |         beta (float): Beta coefficient for kaiser window.
 26 | 
 27 |     Returns:
 28 |         ndarray: Impluse response of prototype filter (taps + 1,).
 29 | 
 30 |     .. _`A Kaiser window approach for the design of prototype filters of cosine modulated filterbanks`:
 31 |         https://ieeexplore.ieee.org/abstract/document/681427
 32 | 
 33 |     """
 34 |     # check the arguments are valid
 35 |     assert taps % 2 == 0, "The number of taps mush be even number."
 36 |     assert 0.0 < cutoff_ratio < 1.0, "Cutoff ratio must be > 0.0 and < 1.0."
 37 | 
 38 |     # make initial filter
 39 |     omega_c = np.pi * cutoff_ratio
 40 |     with np.errstate(invalid='ignore'):
 41 |         h_i = np.sin(omega_c * (np.arange(taps + 1) - 0.5 * taps)) \
 42 |             / (np.pi * (np.arange(taps + 1) - 0.5 * taps))
 43 |     h_i[taps // 2] = np.cos(0) * cutoff_ratio  # fix nan due to indeterminate form
 44 | 
 45 |     # apply kaiser window
 46 |     w = kaiser(taps + 1, beta)
 47 |     h = h_i * w
 48 | 
 49 |     return h
 50 | 
 51 | 
 52 | class PQMF(torch.nn.Module):
 53 |     """PQMF module.
 54 | 
 55 |     This module is based on `Near-perfect-reconstruction pseudo-QMF banks`_.
 56 | 
 57 |     .. _`Near-perfect-reconstruction pseudo-QMF banks`:
 58 |         https://ieeexplore.ieee.org/document/258122
 59 | 
 60 |     """
 61 | 
 62 |     def __init__(self, subbands=4):
 63 |         """Initilize PQMF module.
 64 | 
 65 |         Args:
 66 |             subbands (int): The number of subbands.
 67 | 
 68 |         """
 69 |         super(PQMF, self).__init__()
 70 | 
 71 |         self.subbands = subbands
 72 |         # Kaiser parameters calculation
 73 |         #self.err = 1e-5 # passband ripple
 74 |         #self.err = 1e-10 # passband ripple
 75 |         self.err = 1e-20 # passband ripple
 76 |         self.A = -20*np.log10(self.err)  # attenuation in stopband [dB]
 77 |         self.taps = int((self.A-8)/(2.285*(0.8/self.subbands)*np.pi)) # (0.8/subbands * pi) is the width of band-transition
 78 |         if self.taps % 2 != 0:
 79 |             self.taps += 1
 80 |         self.cutoff_ratio = round(0.6/self.subbands, 4)
 81 |         self.beta = round(0.1102*(self.A-8.7), 5)
 82 |         #print(f'{subbands} {err} {A} {taps} {cutoff_ratio} {beta}')
 83 | 
 84 |         # define filter coefficient
 85 |         h_proto = design_prototype_filter(self.taps, self.cutoff_ratio, self.beta)
 86 |         # n_bands x (taps+1)
 87 |         h_analysis = np.zeros((self.subbands, len(h_proto)))
 88 |         h_synthesis = np.zeros((self.subbands, len(h_proto)))
 89 |         for k in range(self.subbands):
 90 |             h_analysis[k] = 2 * h_proto * np.cos(
 91 |                 (2 * k + 1) * (np.pi / (2 * self.subbands)) *
 92 |                 (np.arange(self.taps + 1) - ((self.taps - 1) / 2)) +
 93 |                 (-1) ** k * np.pi / 4)
 94 |             h_synthesis[k] = 2 * h_proto * np.cos(
 95 |                 (2 * k + 1) * (np.pi / (2 * self.subbands)) *
 96 |                 (np.arange(self.taps + 1) - ((self.taps - 1) / 2)) -
 97 |                 (-1) ** k * np.pi / 4)
 98 | 
 99 |         # convert to tensor
100 |         # out x in x kernel --> weight shape of Conv1d pytorch
101 |         analysis_filter = torch.from_numpy(h_analysis).float().unsqueeze(1) # n_bands x 1 x (taps+1)
102 |         synthesis_filter = torch.from_numpy(h_synthesis).float().unsqueeze(0) # 1 x n_bands x (taps+1)
103 | 
104 |         # register coefficients as beffer
105 |         self.register_buffer("analysis_filter", analysis_filter)
106 |         self.register_buffer("synthesis_filter", synthesis_filter)
107 | 
108 |         ## filter for downsampling & upsampling
109 |         # down/up-sampling filter is used in the multiband domain, hence out=in=n_bands
110 |         updown_filter = torch.zeros((self.subbands, self.subbands, self.subbands)).float()
111 |         for k in range(self.subbands):
112 |             updown_filter[k, k, 0] = 1.0 #only the 1st kernel, i.e., zero to the other right samples
113 |         self.register_buffer("updown_filter", updown_filter)
114 | 
115 |         # keep padding info
116 |         self.pad_fn = torch.nn.ConstantPad1d(self.taps // 2, 0.0)
117 | 
118 |     def analysis(self, x):
119 |         """Analysis with PQMF.
120 | 
121 |         Args:
122 |             x (Tensor): Input tensor (B, 1, T).
123 | 
124 |         Returns:
125 |             Tensor: Output tensor (B, subbands, T // subbands).
126 | 
127 |         """
128 |         # B x 1 x T --> B x n_bands x T
129 |         x = F.conv1d(self.pad_fn(x), self.analysis_filter)
130 |         # B x n_bands x T --> B x n_bands x (T//n_bands) [discard the 2nd-nth indices every n index]
131 |         return F.conv1d(x, self.updown_filter, stride=self.subbands)
132 | 
133 |     def synthesis(self, x):
134 |         """Synthesis with PQMF.
135 | 
136 |         Args:
137 |             x (Tensor): Input tensor (B, subbands, T // subbands).
138 | 
139 |         Returns:
140 |             Tensor: Output tensor (B, 1, T).
141 | 
142 |         """
143 |         # NOTE(kan-bayashi): Power will be dreased so here multipy by # subbands.
144 |         #   Not sure this is the correct way, it is better to check again.
145 |         # TODO(kan-bayashi): Understand the reconstruction procedure
146 |         # B x n_bands x (T//n_bands) --> B x n_bands x T 
147 |         # [zeroing the 2nd-nth indices every n index, and multiply by n_bands at each 1st index]
148 |         x = F.conv_transpose1d(x, self.updown_filter * self.subbands, stride=self.subbands)
149 |         # B x n_bands x T --> B x 1 x T
150 |         return F.conv1d(self.pad_fn(x), self.synthesis_filter)
151 | 


--------------------------------------------------------------------------------
/src/utils/parse_options.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 4 | #                 Arnab Ghoshal, Karel Vesely
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | # Parse command-line options.
21 | # To be sourced by another script (as in ". parse_options.sh").
22 | # Option format is: --option-name arg
23 | # and shell variable "option_name" gets set to value "arg."
24 | # The exception is --help, which takes no arguments, but prints the 
25 | # $help_message variable (if defined).
26 | 
27 | 
28 | ###
29 | ### The --config file options have lower priority to command line 
30 | ### options, so we need to import them first...
31 | ###
32 | 
33 | # Now import all the configs specified by command-line, in left-to-right order
34 | for ((argpos=1; argpos<$#; argpos++)); do
35 |   if [ "${!argpos}" == "--config" ]; then
36 |     argpos_plus1=$((argpos+1))
37 |     config=${!argpos_plus1}
38 |     [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
39 |     . $config  # source the config file.
40 |   fi
41 | done
42 | 
43 | 
44 | ###
45 | ### No we process the command line options
46 | ###
47 | while true; do
48 |   [ -z "${1:-}" ] && break;  # break if there are no arguments
49 |   case "$1" in
50 |     # If the enclosing script is called with --help option, print the help 
51 |     # message and exit.  Scripts should put help messages in $help_message
52 |   --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
53 | 	  else printf "$help_message\n" 1>&2 ; fi; 
54 | 	  exit 0 ;; 
55 |   --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
56 |        exit 1 ;;
57 |     # If the first command-line argument begins with "--" (e.g. --foo-bar), 
58 |     # then work out the variable name as $name, which will equal "foo_bar".
59 |   --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
60 |     # Next we test whether the variable in question is undefned-- if so it's 
61 |     # an invalid option and we die.  Note: $0 evaluates to the name of the 
62 |     # enclosing script.
63 |     # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
64 |     # is undefined.  We then have to wrap this test inside "eval" because 
65 |     # foo_bar is itself inside a variable ($name).
66 |       eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
67 |       
68 |       oldval="`eval echo \\$$name`";
69 |     # Work out whether we seem to be expecting a Boolean argument.
70 |       if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
71 | 	was_bool=true;
72 |       else 
73 | 	was_bool=false;
74 |       fi
75 | 
76 |     # Set the variable to the right value-- the escaped quotes make it work if
77 |     # the option had spaces, like --cmd "queue.pl -sync y"
78 |       eval $name=\"$2\"; 
79 |         
80 |     # Check that Boolean-valued arguments are really Boolean.
81 |       if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
82 |         echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
83 |         exit 1;
84 |       fi
85 |       shift 2;
86 |       ;;
87 |   *) break;
88 |   esac
89 | done
90 | 
91 | 
92 | # Check for an empty argument to the --cmd option, which can easily occur as a 
93 | # result of scripting errors.
94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
95 | 
96 | 
97 | true; # so this script returns exit code 0.
98 | 


--------------------------------------------------------------------------------
/src/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright 2017 Tomoki Hayashi (Nagoya University)
  4 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  5 | 
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import fnmatch
 10 | import os
 11 | import sys
 12 | import threading
 13 | 
 14 | import h5py
 15 | import numpy as np
 16 | 
 17 | 
 18 | def check_hdf5(hdf5_name, hdf5_path):
 19 |     """FUNCTION TO CHECK HDF5 EXISTENCE
 20 | 
 21 |     Args:
 22 |         hdf5_name (str): filename of hdf5 file
 23 |         hdf5_path (str): dataset name in hdf5 file
 24 | 
 25 |     Return:
 26 |         (bool): dataset exists then return true
 27 |     """
 28 |     if not os.path.exists(hdf5_name):
 29 |         return False
 30 |     else:
 31 |         with h5py.File(hdf5_name, "r") as f:
 32 |             if hdf5_path in f:
 33 |                 return True
 34 |             else:
 35 |                 return False
 36 | 
 37 | 
 38 | def read_hdf5(hdf5_name, hdf5_path):
 39 |     """FUNCTION TO READ HDF5 DATASET
 40 | 
 41 |     Args:
 42 |         hdf5_name (str): filename of hdf5 file
 43 |         hdf5_path (str): dataset name in hdf5 file
 44 | 
 45 |     Return:
 46 |         dataset values
 47 |     """
 48 |     if not os.path.exists(hdf5_name):
 49 |         print("ERROR: There is no such a hdf5 file. (%s)" % hdf5_name)
 50 |         print("Please check the hdf5 file path.")
 51 |         sys.exit(-1)
 52 | 
 53 |     hdf5_file = h5py.File(hdf5_name, "r")
 54 | 
 55 |     if hdf5_path not in hdf5_file:
 56 |         print("ERROR: There is no such a data in hdf5 file. (%s)" % hdf5_path)
 57 |         print("Please check the data path in hdf5 file.")
 58 |         sys.exit(-1)
 59 | 
 60 |     #hdf5_data = hdf5_file[hdf5_path].value #deprecated
 61 |     hdf5_data = hdf5_file[hdf5_path][()]
 62 |     hdf5_file.close()
 63 | 
 64 |     return hdf5_data
 65 | 
 66 | 
 67 | def shape_hdf5(hdf5_name, hdf5_path):
 68 |     """FUNCTION TO GET HDF5 DATASET SHAPE
 69 | 
 70 |     Args:
 71 |         hdf5_name (str): filename of hdf5 file
 72 |         hdf5_path (str): dataset name in hdf5 file
 73 | 
 74 |     Return:
 75 |         (tuple): shape of dataset
 76 |     """
 77 |     if check_hdf5(hdf5_name, hdf5_path):
 78 |         with h5py.File(hdf5_name, "r") as f:
 79 |             hdf5_shape = f[hdf5_path].shape
 80 |         return hdf5_shape
 81 |     else:
 82 |         print("There is no such a file or dataset")
 83 |         sys.exit(-1)
 84 | 
 85 | 
 86 | def write_hdf5(hdf5_name, hdf5_path, write_data, is_overwrite=True):
 87 |     """FUNCTION TO WRITE DATASET TO HDF5
 88 | 
 89 |     Args :
 90 |         hdf5_name (str): hdf5 dataset filename
 91 |         hdf5_path (str): dataset path in hdf5
 92 |         write_data (ndarray): data to write
 93 |         is_overwrite (bool): flag to decide whether to overwrite dataset
 94 |     """
 95 |     # convert to numpy array
 96 |     write_data = np.array(write_data)
 97 | 
 98 |     # check folder existence
 99 |     folder_name, _ = os.path.split(hdf5_name)
100 |     if not os.path.exists(folder_name) and len(folder_name) != 0:
101 |         os.makedirs(folder_name)
102 | 
103 |     # check hdf5 existence
104 |     if os.path.exists(hdf5_name):
105 |         # if already exists, open with r+ mode
106 |         hdf5_file = h5py.File(hdf5_name, "r+")
107 |         # check dataset existence
108 |         if hdf5_path in hdf5_file:
109 |             if is_overwrite:
110 |                 print("Warning: data in hdf5 file already exists. recreate dataset in hdf5.")
111 |                 hdf5_file.__delitem__(hdf5_path)
112 |             else:
113 |                 print("ERROR: there is already dataset.")
114 |                 print("if you want to overwrite, please set is_overwrite = True.")
115 |                 hdf5_file.close()
116 |                 sys.exit(1)
117 |     else:
118 |         # if not exists, open with w mode
119 |         hdf5_file = h5py.File(hdf5_name, "w")
120 | 
121 |     # write data to hdf5
122 |     hdf5_file.create_dataset(hdf5_path, data=write_data)
123 |     hdf5_file.flush()
124 |     hdf5_file.close()
125 | 
126 |     return 1
127 | 
128 | 
129 | def find_files(directory, pattern="*.wav", use_dir_name=True):
130 |     """FUNCTION TO FIND FILES RECURSIVELY
131 | 
132 |     Args:
133 |         directory (str): root directory to find
134 |         pattern (str): query to find
135 |         use_dir_name (bool): if False, directory name is not included
136 | 
137 |     Return:
138 |         (list): list of found filenames
139 |     """
140 |     files = []
141 |     for root, dirnames, filenames in os.walk(directory, followlinks=True):
142 |         for filename in fnmatch.filter(filenames, pattern):
143 |             files.append(os.path.join(root, filename))
144 |     if not use_dir_name:
145 |         files = [file_.replace(directory + "/", "") for file_ in files]
146 |     return files
147 | 
148 | 
149 | def read_txt(file_list):
150 |     """FUNCTION TO READ TXT FILE
151 | 
152 |     Arg:
153 |         file_list (str): txt file filename
154 | 
155 |     Return:
156 |         (list): list of read lines
157 |     """
158 |     with open(file_list, "r") as f:
159 |         filenames = f.readlines()
160 |     return [filename.replace("\n", "") for filename in filenames]
161 | 
162 | 
163 | class BackgroundGenerator(threading.Thread):
164 |     """BACKGROUND GENERATOR
165 | 
166 |     reference:
167 |         https://stackoverflow.com/questions/7323664/python-generator-pre-fetch
168 | 
169 |     Args:
170 |         generator (object): generator instance
171 |         max_prefetch (int): max number of prefetch
172 |     """
173 | 
174 |     def __init__(self, generator, max_prefetch=1):
175 |         threading.Thread.__init__(self)
176 |         if sys.version_info.major == 2:
177 |             from Queue import Queue
178 |         else:
179 |             from queue import Queue
180 |         self.queue = Queue(max_prefetch)
181 |         self.generator = generator
182 |         self.daemon = True
183 |         self.start()
184 | 
185 |     def run(self):
186 |         for item in self.generator:
187 |             self.queue.put(item)
188 |         self.queue.put(None)
189 | 
190 |     def next(self):
191 |         next_item = self.queue.get()
192 |         if next_item is None:
193 |             raise StopIteration
194 |         return next_item
195 | 
196 |     def __next__(self):
197 |         return self.next()
198 | 
199 |     def __iter__(self):
200 |         return self
201 | 
202 | 
203 | class background(object):
204 |     """BACKGROUND GENERATOR DECORATOR"""
205 | 
206 |     def __init__(self, max_prefetch=1):
207 |         self.max_prefetch = max_prefetch
208 | 
209 |     def __call__(self, gen):
210 |         def bg_generator(*args, **kwargs):
211 |             return BackgroundGenerator(gen(*args, **kwargs))
212 |         return bg_generator
213 | 


--------------------------------------------------------------------------------
/tools/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON_VERSION_MIN=3.6
 2 | PYTHON_VERSION_MAX=3.9
 3 | PYTHON_VERSION=$(shell python3 -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)")
 4 | PYTHON_VERSION_OK=$(shell python3 -c 'import sys; print(int(float(${PYTHON_VERSION}) >= ${PYTHON_VERSION_MIN} and float(${PYTHON_VERSION}) <= ${PYTHON_VERSION_MAX}))')
 5 | 
 6 | #$(info $$PYTHON_VERSION_OK is [${PYTHON_VERSION_OK}])
 7 | 
 8 | ifeq ($(PYTHON_VERSION_OK),0)
 9 |   $(error "Need python $(PYTHON_VERSION) >= $(PYTHON_VERSION_MIN)")
10 | endif
11 | PYTHON_VERSION_WITHOUT_DOT = $(strip $(subst .,,$(PYTHON_VERSION)))
12 | 
13 | PYTHON_VERSION_38=$(shell python3 -c 'import sys; print(int(float(${PYTHON_VERSION}) >= 3.8))')
14 | 
15 | #$(info $$PYTHON_VERSION_38 is [${PYTHON_VERSION_38}])
16 | 
17 | ifeq ($(PYTHON_VERSION_38),0)
18 |     PYTHON_VERSION_WITHOUT_DOT_M=$(PYTHON_VERSION_WITHOUT_DOT)m
19 | else
20 |     PYTHON_VERSION_WITHOUT_DOT_M=$(PYTHON_VERSION_WITHOUT_DOT)
21 | endif
22 | 
23 | #$(info $$PYTHON_VERSION_WITHOUT_DOT_M is [${PYTHON_VERSION_WITHOUT_DOT_M}])
24 | 
25 | CUDA_VERSION := 11.1
26 | CUDA_VERSION_WITHOUT_DOT = $(strip $(subst .,,$(CUDA_VERSION)))
27 | 
28 | PYTORCH_VERSION := 1.8.1
29 | TORCHVISION_VERSION := 0.9.1
30 | TORCHAUDIO_VERSION := 0.8.1
31 | 
32 | .PHONY: all clean
33 | 
34 | all: activate torch
35 | 
36 | activate:
37 | 	test -d venv || virtualenv -p python$(PYTHON_VERSION) venv
38 | 	. venv/bin/activate; pip install pip --upgrade; pip install -r requirements.txt; pip install matplotlib;
39 | 	touch venv/bin/activate
40 | 
41 | torch: activate
42 | 	. venv/bin/activate; pip install pip --upgrade; \
43 |         pip install https://download.pytorch.org/whl/cu$(CUDA_VERSION_WITHOUT_DOT)/torch-$(PYTORCH_VERSION)%2Bcu$(CUDA_VERSION_WITHOUT_DOT)-cp$(PYTHON_VERSION_WITHOUT_DOT)-cp$(PYTHON_VERSION_WITHOUT_DOT_M)-linux_x86_64.whl; pip install torch-optimizer; \
44 |             pip install https://download.pytorch.org/whl/cu$(CUDA_VERSION_WITHOUT_DOT)/torchvision-$(TORCHVISION_VERSION)%2Bcu$(CUDA_VERSION_WITHOUT_DOT)-cp$(PYTHON_VERSION_WITHOUT_DOT)-cp$(PYTHON_VERSION_WITHOUT_DOT_M)-linux_x86_64.whl; \
45 |             pip install https://download.pytorch.org/whl/torchaudio-${TORCHAUDIO_VERSION}-cp${PYTHON_VERSION_WITHOUT_DOT}-cp${PYTHON_VERSION_WITHOUT_DOT_M}-linux_x86_64.whl
46 | 
47 | clean:
48 | 	rm -fr venv
49 | 	find -iname "*.pyc" -delete
50 | 
51 | 


--------------------------------------------------------------------------------
/tools/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | h5py
 3 | scipy
 4 | scikit-learn
 5 | scikit-image
 6 | soundfile
 7 | pysptk
 8 | pyworld
 9 | numba==0.48
10 | librosa
11 | dtw-c>=1.0.2
12 | six
13 | yq
14 | tensorboardX
15 | 


--------------------------------------------------------------------------------