├── asr1
    ├── conf
    │   ├── pitch.conf
    │   ├── fbank.conf
    │   ├── mfcc.conf
    │   ├── decode_rnn.yaml
    │   ├── decode_transformer.yaml
    │   ├── decode_pytorch_transformer_large.yaml
    │   ├── specaug.yaml
    │   ├── lm.yaml
    │   ├── train_pytorch_transformer_large_ngpu4.yaml
    │   ├── train_rnn.yaml
    │   ├── train_transformer.yaml
    │   └── train_pytorch_conformer_lr5.yaml
    ├── setup_experiment.sh
    ├── path.sh
    ├── cmd.sh
    ├── recog_wav.sh
    ├── local
    │   ├── data_prep.py
    │   └── .ipynb_checkpoints
    │   │   └── data_prep-checkpoint.py
    ├── run.sh
    └── .ipynb_checkpoints
    │   └── run-checkpoint.sh
└── README.md


/asr1/conf/pitch.conf:
--------------------------------------------------------------------------------
1 | --sample-frequency=16000
2 | 


--------------------------------------------------------------------------------
/asr1/conf/fbank.conf:
--------------------------------------------------------------------------------
1 | --sample-frequency=16000 
2 | --num-mel-bins=80
3 | 


--------------------------------------------------------------------------------
/asr1/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false   # only non-default option.
2 | --sample-frequency=16000 #  Switchboard is sampled at 8kHz


--------------------------------------------------------------------------------
/asr1/conf/decode_rnn.yaml:
--------------------------------------------------------------------------------
1 | lm-weight: 1.0
2 | beam-size: 30
3 | penalty: 0.0
4 | maxlenratio: 0.0
5 | minlenratio: 0.0
6 | ctc-weight: 0.3
7 | 


--------------------------------------------------------------------------------
/asr1/conf/decode_transformer.yaml:
--------------------------------------------------------------------------------
1 | batchsize: 0
2 | beam-size: 10
3 | penalty: 0.0
4 | maxlenratio: 0.0
5 | minlenratio: 0.0
6 | ctc-weight: 0.3
7 | lm-weight: 1.0
8 | 


--------------------------------------------------------------------------------
/asr1/conf/decode_pytorch_transformer_large.yaml:
--------------------------------------------------------------------------------
1 | batchsize: 0
2 | beam-size: 60
3 | ctc-weight: 0.4
4 | lm-weight: 0.6
5 | maxlenratio: 0.0
6 | minlenratio: 0.0
7 | penalty: 0.0
8 | 


--------------------------------------------------------------------------------
/asr1/conf/specaug.yaml:
--------------------------------------------------------------------------------
 1 | process:
 2 |   # these three processes are a.k.a. SpecAugument
 3 |   - type: "freq_mask"
 4 |     F: 30
 5 |     n_mask: 2
 6 |     inplace: true
 7 |     replace_with_zero: false
 8 |   - type: "time_mask"
 9 |     T: 40
10 |     n_mask: 2
11 |     inplace: true
12 |     replace_with_zero: false


--------------------------------------------------------------------------------
/asr1/setup_experiment.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 1 ]; then
 4 |   echo >&2 "Usage: ./setup_experiment.sh <expname>"
 5 |   exit 1;
 6 | fi
 7 | 
 8 | expname=$1
 9 | cd ..
10 | mkdir ${expname}
11 | cd ${expname}
12 | 
13 | cp ../asr1/{cmd,path,run,recog_wav}.sh .
14 | cp -P ../asr1/steps .
15 | cp -P ../asr1/utils .
16 | ln -s ../asr1/local .
17 | ln -s ../asr1/conf .
18 | 


--------------------------------------------------------------------------------
/asr1/conf/lm.yaml:
--------------------------------------------------------------------------------
1 | layer: 2         # 2 for character LMs
2 | unit: 650       # 650 for character LMs
3 | opt: adam          # adam for character LMs
4 | sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
5 | batchsize: 1024    # 1024 for character LMs
6 | epoch: 100        # number of epochs
7 | patience: 0
8 | maxlen: 150        # 150 for character LMs


--------------------------------------------------------------------------------
/asr1/conf/train_pytorch_transformer_large_ngpu4.yaml:
--------------------------------------------------------------------------------
 1 | # This configuration requires 4 gpus with 12GB memory
 2 | accum-grad: 4
 3 | adim: 512
 4 | aheads: 8
 5 | backend: pytorch
 6 | batch-bins: 15000000
 7 | dlayers: 6
 8 | dropout-rate: 0.1
 9 | dunits: 2048
10 | elayers: 12
11 | epochs: 120
12 | eunits: 2048
13 | grad-clip: 5
14 | lsm-weight: 0.1
15 | model-module: espnet.nets.pytorch_backend.e2e_asr_transformer:E2E
16 | mtlalpha: 0.3
17 | opt: noam
18 | patience: 0
19 | sortagrad: 0
20 | transformer-attn-dropout-rate: 0.0
21 | transformer-init: pytorch
22 | transformer-input-layer: conv2d
23 | transformer-length-normalized-loss: false
24 | transformer-lr: 10.0
25 | transformer-warmup-steps: 25000


--------------------------------------------------------------------------------
/asr1/path.sh:
--------------------------------------------------------------------------------
 1 | MAIN_ROOT=$PWD/../../..
 2 | KALDI_ROOT=$MAIN_ROOT/tools/kaldi
 3 | 
 4 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
 5 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
 6 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 7 | . $KALDI_ROOT/tools/config/common_path.sh
 8 | export LC_ALL=C
 9 | 
10 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5/:$PWD:$PATH
11 | 
12 | export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
13 | if [ -e $MAIN_ROOT/tools/venv/etc/profile.d/conda.sh ]; then
14 |     source $MAIN_ROOT/tools/venv/etc/profile.d/conda.sh && conda deactivate && conda activate
15 | else
16 |     source $MAIN_ROOT/tools/venv/bin/activate
17 | fi
18 | export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
19 | 
20 | export OMP_NUM_THREADS=1
21 | 
22 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
23 | export PYTHONIOENCODING=UTF-8
24 | 


--------------------------------------------------------------------------------
/asr1/conf/train_rnn.yaml:
--------------------------------------------------------------------------------
 1 | # network architecture
 2 | # encoder related
 3 | etype: vggblstmp     # encoder architecture type
 4 | elayers: 6
 5 | eunits: 320
 6 | eprojs: 320
 7 | subsample: "1_2_2_1_1" # skip every n frame from input to nth layers
 8 | # decoder related
 9 | dlayers: 3
10 | dunits: 300
11 | # attention related
12 | atype: location
13 | adim: 320
14 | awin: 5
15 | aheads: 4
16 | aconv-chans: 10
17 | aconv-filts: 100
18 | 
19 | # hybrid CTC/attention
20 | mtlalpha: 0.2
21 | 
22 | # label smoothing
23 | lsm-type: unigram
24 | lsm-weight: 0.05
25 | 
26 | # minibatch related
27 | batch-size: 30
28 | maxlen-in: 800  # if input length  > maxlen_in, batchsize is automatically reduced
29 | maxlen-out: 150 # if output length > maxlen_out, batchsize is automatically reduced
30 | 
31 | # optimization related
32 | sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
33 | opt: adadelta
34 | epochs: 40
35 | patience: 5
36 | 
37 | # scheduled sampling option
38 | sampling-probability: 0.0
39 | 
40 | # Report CER & WER
41 | report-cer: true
42 | report-wer: true


--------------------------------------------------------------------------------
/asr1/conf/train_transformer.yaml:
--------------------------------------------------------------------------------
 1 | # network architecture
 2 | # encoder related
 3 | elayers: 12
 4 | eunits: 2048
 5 | # decoder related
 6 | dlayers: 6
 7 | dunits: 2048
 8 | # attention related
 9 | adim: 256
10 | aheads: 4
11 | 
12 | # hybrid CTC/attention
13 | mtlalpha: 0.3
14 | 
15 | # label smoothing
16 | lsm-weight: 0.1
17 | 
18 | # minibatch related
19 | batch-size: 32
20 | maxlen-in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
21 | maxlen-out: 150 # if output length > maxlen-out, batchsize is automatically reduced
22 | 
23 | # optimization related
24 | sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
25 | opt: noam
26 | accum-grad: 2
27 | grad-clip: 5
28 | patience: 10
29 | epochs: 160
30 | dropout-rate: 0.1
31 | 
32 | # transformer specific setting
33 | backend: pytorch
34 | model-module: "espnet.nets.pytorch_backend.e2e_asr_transformer:E2E"
35 | transformer-input-layer: conv2d     # encoder architecture type
36 | transformer-lr: 10.0
37 | transformer-warmup-steps: 25000
38 | transformer-attn-dropout-rate: 0.0
39 | transformer-length-normalized-loss: false
40 | transformer-init: pytorch
41 | 
42 | # Report CER & WER
43 | report-cer: true
44 | report-wer: true
45 | 


--------------------------------------------------------------------------------
/asr1/conf/train_pytorch_conformer_lr5.yaml:
--------------------------------------------------------------------------------
 1 | # network architecture
 2 | # encoder related
 3 | elayers: 12
 4 | eunits: 2048
 5 | # decoder related
 6 | dlayers: 6
 7 | dunits: 2048
 8 | # attention related
 9 | adim: 256
10 | aheads: 4
11 | 
12 | # hybrid CTC/attention
13 | mtlalpha: 0.2
14 | 
15 | # label smoothing
16 | lsm-weight: 0.1
17 | 
18 | # minibatch related
19 | batch-size: 64
20 | maxlen-in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
21 | maxlen-out: 150 # if output length > maxlen-out, batchsize is automatically reduced
22 | 
23 | # optimization related
24 | sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
25 | opt: noam
26 | accum-grad: 8
27 | grad-clip: 5
28 | patience: 0
29 | epochs: 100
30 | dropout-rate: 0.1
31 | 
32 | # transformer specific setting
33 | backend: pytorch
34 | model-module: "espnet.nets.pytorch_backend.e2e_asr_conformer:E2E"
35 | transformer-input-layer: conv2d     # encoder architecture type
36 | transformer-lr: 5.0
37 | transformer-warmup-steps: 25000
38 | transformer-attn-dropout-rate: 0.0
39 | transformer-length-normalized-loss: false
40 | transformer-init: pytorch
41 | 
42 | # conformer specific setting
43 | transformer-encoder-pos-enc-layer-type: rel_pos
44 | transformer-encoder-selfattn-layer-type: rel_selfattn
45 | transformer-encoder-activation-type: swish
46 | macaron-style: true
47 | use-cnn-module: true
48 | cnn-module-kernel: 31


--------------------------------------------------------------------------------
/asr1/cmd.sh:
--------------------------------------------------------------------------------
 1 | # ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
 2 | # Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
 3 | # e.g.
 4 | #   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
 5 | #
 6 | # Options:
 7 | #   --time <time>: Limit the maximum time to execute.
 8 | #   --mem <mem>: Limit the maximum memory usage.
 9 | #   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
10 | #   --num-threads <ngpu>: Specify the number of CPU core.
11 | #   --gpu <ngpu>: Specify the number of GPU devices.
12 | #   --config: Change the configuration file from default.
13 | #
14 | # "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
15 | # The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
16 | # e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
17 | # Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
18 | #
19 | # run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
20 | # These options are mapping to specific options for each backend and
21 | # it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
22 | # If jobs failed, your configuration might be wrong for your environment.
23 | #
24 | #
25 | # The official documentaion for run.pl, queue.pl, slurm.pl, and ssh.pl:
26 | #   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
27 | # =========================================================~
28 | 
29 | 
30 | # Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
31 | cmd_backend='local'
32 | 
33 | # Local machine, without any Job scheduling system
34 | if [ "${cmd_backend}" = local ]; then
35 | 
36 |     # The other usage
37 |     export train_cmd="run.pl"
38 |     # Used for "*_train.py": "--gpu" is appended optionally by run.sh
39 |     export cuda_cmd="run.pl"
40 |     # Used for "*_recog.py"
41 |     export decode_cmd="run.pl"
42 | 
43 | # "qsub" (SGE, Torque, PBS, etc.)
44 | elif [ "${cmd_backend}" = sge ]; then
45 |     # The default setting is written in conf/queue.conf.
46 |     # You must change "-q g.q" for the "queue" for your environment.
47 |     # To know the "queue" names, type "qhost -q"
48 |     # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.
49 | 
50 |     export train_cmd="queue.pl"
51 |     export cuda_cmd="queue.pl"
52 |     export decode_cmd="queue.pl"
53 | 
54 | # "sbatch" (Slurm)
55 | elif [ "${cmd_backend}" = slurm ]; then
56 |     # The default setting is written in conf/slurm.conf.
57 |     # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
58 |     # To know the "partion" names, type "sinfo".
59 |     # You can use "--gpu * " by defualt for slurm and it is interpreted as "--gres gpu:*"
60 |     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
61 | 
62 |     export train_cmd="slurm.pl"
63 |     export cuda_cmd="slurm.pl"
64 |     export decode_cmd="slurm.pl"
65 | 
66 | elif [ "${cmd_backend}" = ssh ]; then
67 |     # You have to create ".queue/machines" to specify the host to execute jobs.
68 |     # e.g. .queue/machines
69 |     #   host1
70 |     #   host2
71 |     #   host3
72 |     # Assuming you can login them without any password, i.e. You have to set ssh keys.
73 | 
74 |     export train_cmd="ssh.pl"
75 |     export cuda_cmd="ssh.pl"
76 |     export decode_cmd="ssh.pl"
77 | 
78 | # This is an example of specifying several unique options in the JHU CLSP cluster setup.
79 | # Users can modify/add their own command options according to their cluster environments.
80 | elif [ "${cmd_backend}" = jhu ]; then
81 | 
82 |     export train_cmd="queue.pl --mem 2G"
83 |     export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
84 |     export decode_cmd="queue.pl --mem 4G"
85 | 
86 | else
87 |     echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
88 |     return 1
89 | fi
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MultilingualASR
 2 | 
 3 | 
 4 | This repository provides the recipe for the paper [A Study of Multilingual End-to-End Speech Recognition for Kazakh, Russian, and English](https://arxiv.org/abs/2108.01280).
 5 | 
 6 | 
 7 | 
 8 | ## Setup and Requirements 
 9 | 
10 | Our code builds upon [ESPnet](https://github.com/espnet/espnet), and requires prior installation of the framework. Please follow the [installation guide](https://espnet.github.io/espnet/installation.html) and put the repository inside `espnet/egs/` directory.
11 | 
12 | After succesfull installation of ESPnet & Kaldi, go to `MultilingualASR/asr1` directory and create links to the dependencies:
13 | ```
14 | ln -s ../../../tools/kaldi/egs/wsj/s5/steps steps
15 | ln -s ../../../tools/kaldi/egs/wsj/s5/utils utils
16 | ```
17 | The directory for running the experiments (`MultilingualASR/<exp-name`) can be created by running the following script:
18 | 
19 | ```
20 | ./setup_experiment.sh <exp-name>
21 | ```
22 | 
23 | ## Downloading the dataset
24 |  
25 | Download [KSC](https://issai.nu.edu.kz/kz-speech-corpus/) and [OpenSTT & CV datasets](https://issai.nu.edu.kz/multilingual-asr/) and untar in the directory of your choice. Specify the path to the data in  `./run.sh` script:
26 | ```
27 | data_dir_kz='path-to-KSC'
28 | data_dir_ru='path-to-OpenSTT'
29 | data_dir_en='-path-to-CV'
30 | ```
31 | ## Training
32 | 
33 | To train the models, run the script `./run.sh` inside `MultilingualASR/<exp-name>/` folder. Specify the experiment type you would like to run:
34 | ```
35 | ./run.sh --exptype "experiment type" --stage 0 --stop_stage 0
36 | ```
37 | **experiment type**: "mono_kz", "mono_ru", "mono_en", "mlt_independent", "mlt_combined".
38 | 
39 | **Note:** We suggest to run the experiments one step at a time to avoid errors. 
40 | During decoding, change `beam-size:30` in conf/decode_pytorch_transformer_large.yaml for monolingual experiments. 
41 | 
42 | ## Pre-trained models
43 | 
44 | | Model | Large Transformer |  Large Transformer with Speed Perturbation (SP) |  Large Transformer with SP and SpecAugment|
45 | | --- | --- | --- | --- |
46 | | monolingual kazakh | [mono_kz_transformer_large.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_kz_transformer_large.tar.gz) | [mono_kz_transformer_large_sp.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_kz_transformer_large_sp.tar.gz) | [mono_kz_transformer_large_sp_specaug.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_kz_transformer_large_sp_specaug.tar.gz) |
47 | | monolingual russian | [mono_ru_transformer_large.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_ru_transformer_large.tar.gz) | [mono_ru_transformer_large_sp.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_ru_transformer_large_sp.tar.gz) | [mono_ru_transformer_large_sp_specaug.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_ru_transformer_large_sp_specaug.tar.gz) |
48 | | monolingual english | [mono_en_transformer_large.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_en_transformer_large.tar.gz) | [mono_en_transformer_large_sp.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_en_transformer_large_sp.tar.gz) | [mono_en_transformer_large_sp_specaug.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/mono_en_transformer_large_sp_specaug.tar.gz) |
49 | | multilingual combined | [multi_combined_transformer_large.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/multi_combined_transformer_large.tar.gz) | [multi_combined_transformer_large_sp.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/multi_combined_transformer_large_sp.tar.gz) | [multi_combined_transformer_large_sp_specaug.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/multi_combined_transformer_large_sp_specaug.tar.gz) |
50 | | multilingual independent | [multi_independent_transformer_large.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/multi_independent_transformer_large.tar.gz) | [multi_independent_transformer_large_sp.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/multi_independent_transformer_large_sp.tar.gz) | [multi_independent_transformer_large_sp_specaug.tar.gz](https://issai.nu.edu.kz/wp-content/uploads/2021/07/multi_independent_transformer_large_sp_specaug.tar.gz) |
51 | 
52 | 
53 | 
54 | ## Inference
55 | To decode a single audio, specify paths to the following files inside `recog_wav.sh` script:
56 | ```
57 | lang_model= path to rnnlm.model.best
58 | cmvn= path to cmvn.ark, for example data/train/cmvn.ark
59 | recog_model= path to e2e model, in case of large transformer: model.last10.avg.best 
60 | ```
61 | Then, run the following script:
62 | ```
63 | ./recog_wav.sh <path-to-audio-file>
64 | ```
65 | 
66 | ```python
67 | 
68 | ```
69 | 


--------------------------------------------------------------------------------
/asr1/recog_wav.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2019 Nagoya University (Takenori Yoshimura)
  4 | #           2019 RevComm Inc. (Takekatsu Hiramura)
  5 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  6 | 
  7 | if [ ! -f path.sh ] || [ ! -f cmd.sh ]; then
  8 |     echo "Please change current directory to recipe directory e.g., egs/asr1_mult/asr1"
  9 |     exit 1
 10 | fi
 11 | 
 12 | . ./path.sh
 13 | 
 14 | # general configuration
 15 | backend=pytorch
 16 | stage=0       # start from 0 if you need to start from data preparation
 17 | stop_stage=100
 18 | ngpu=0         # number of gpus ("0" uses cpu, otherwise use gpu)
 19 | debugmode=1
 20 | verbose=1      # verbose option
 21 | 
 22 | # feature configuration
 23 | do_delta=false
 24 | 
 25 | # rnnlm related
 26 | use_lang_model=true
 27 | lang_model=/rnnlm.model.best #if use_lang_model=true, path to rnnlm.model.best
 28 | 
 29 | # decoding parameter
 30 | cmvn= #path to cmvn.ark for example data/train/cmvn.ark
 31 | recog_model=results/model.last10.avg.best  #path to model.last10.avg.best 
 32 | decode_config=conf/decode_pytorch_transformer_large.yaml
 33 | decode_dir=decode
 34 | 
 35 | api=v2
 36 | 
 37 | # download related
 38 | 
 39 | 
 40 | . utils/parse_options.sh || exit 1;
 41 | 
 42 | # make shellcheck happy
 43 | train_cmd=
 44 | decode_cmd=
 45 | 
 46 | . ./cmd.sh
 47 | 
 48 | wav=$1
 49 | 
 50 | set -e
 51 | set -u
 52 | set -o pipefail
 53 | 
 54 | 
 55 | # Check file existence
 56 | if [ ! -f "${cmvn}" ]; then
 57 |     echo "No such CMVN file: ${cmvn}"
 58 |     exit 1
 59 | fi
 60 | if [ ! -f "${lang_model}" ] && ${use_lang_model}; then
 61 |     echo "No such language model: ${lang_model}"
 62 |     exit 1
 63 | fi
 64 | if [ ! -f "${recog_model}" ]; then
 65 |     echo "No such E2E model: ${recog_model}"
 66 |     exit 1
 67 | fi
 68 | if [ ! -f "${decode_config}" ]; then
 69 |     echo "No such config file: ${decode_config}"
 70 |     exit 1
 71 | fi
 72 | if [ ! -f "${wav}" ]; then
 73 |     echo "No such WAV file: ${wav}"
 74 |     exit 1
 75 | fi
 76 | 
 77 | base=$(basename $wav .wav)
 78 | decode_dir=${decode_dir}/${base}
 79 | 
 80 | if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
 81 |     echo "stage 0: Data preparation"
 82 | 
 83 |     mkdir -p ${decode_dir}/data
 84 |     echo "$base $wav" > ${decode_dir}/data/wav.scp
 85 |     
 86 |     #sed -i.bak -e "s/$/ | sox -R -t wav - -t wav - rate 16000 dither | /" ${decode_dir}/data/wav.scp
 87 |     replacement=" /usr/bin/sox "
 88 |     pattern="[[:space:]]"
 89 |     sed -i.bak -e "s@$pattern@$replacement@" ${decode_dir}/data/wav.scp
 90 |     sed -i.bak -e "s/$/ -r 16000 -c 1 -b 16 -t wav - downsample | sox -R -t wav - -t wav - rate 16000 dither | /" ${decode_dir}/data/wav.scp
 91 | 
 92 |     echo "X $base" > ${decode_dir}/data/spk2utt
 93 |     echo "$base X" > ${decode_dir}/data/utt2spk
 94 |     echo "$base X" > ${decode_dir}/data/text
 95 | fi
 96 | 
 97 | if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 98 |     echo "stage 1: Feature Generation"
 99 | 
100 |     steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 1 --write_utt2num_frames true \
101 |         ${decode_dir}/data ${decode_dir}/log ${decode_dir}/fbank
102 | 
103 |     feat_recog_dir=${decode_dir}/dump; mkdir -p ${feat_recog_dir}
104 |     dump.sh --cmd "$train_cmd" --nj 1 --do_delta ${do_delta} \
105 |         ${decode_dir}/data/feats.scp ${cmvn} ${decode_dir}/log \
106 |         ${feat_recog_dir}
107 | fi
108 | 
109 | if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
110 |     echo "stage 2: Json Data Preparation"
111 | 
112 |     dict=${decode_dir}/dict
113 |     echo "<unk> 1" > ${dict}
114 |     feat_recog_dir=${decode_dir}/dump
115 |     data2json.sh --feat ${feat_recog_dir}/feats.scp \
116 |         ${decode_dir}/data ${dict} > ${feat_recog_dir}/data.json
117 |     rm -f ${dict}
118 | fi
119 | 
120 | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
121 |     echo "stage 3: Decoding"
122 |     if ${use_lang_model}; then
123 |         recog_opts="--rnnlm ${lang_model}"
124 |     else
125 |         recog_opts=""
126 |     fi
127 |     feat_recog_dir=${decode_dir}/dump
128 | 
129 |     ${decode_cmd} ${decode_dir}/log/decode.log \
130 |         asr_recog.py \
131 |         --config ${decode_config} \
132 |         --ngpu ${ngpu} \
133 |         --backend ${backend} \
134 |         --debugmode ${debugmode} \
135 |         --verbose ${verbose} \
136 |         --recog-json ${feat_recog_dir}/data.json \
137 |         --result-label ${decode_dir}/result.json \
138 |         --model ${recog_model} \
139 |         --api ${api} \
140 |         ${recog_opts}
141 | 
142 |     echo ""
143 |     recog_text=$(grep rec_text ${decode_dir}/result.json | sed -e 's/.*: "\(.*\)".*/\1/' | sed -e 's/<eos>//')
144 |     echo "Recognized text: ${recog_text}"
145 |     echo ""
146 |     echo "Finished"
147 | fi
148 | 


--------------------------------------------------------------------------------
/asr1/local/data_prep.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys, argparse, re, os, random, glob
  3 | import pandas as pd
  4 | from pathlib import Path
  5 | import wave
  6 | import contextlib
  7 | 
  8 | seed=1234
  9 | 
 10 | def get_args():
 11 |     parser = argparse.ArgumentParser(description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 12 |     parser.add_argument("--char_type", help="char or phn(for independent)")
 13 |     parser.add_argument("--data_dir_kz", help="path to ISSAI_KSC")
 14 |     parser.add_argument("--data_dir_ru", help="path to ISSAI_OpenSTT")
 15 |     parser.add_argument("--data_dir_en", help="path to ISSAI_CV")
 16 |     print(' '.join(sys.argv))
 17 |     args = parser.parse_args()
 18 |     return args
 19 | 
 20 | 
 21 | def read_meta(path):
 22 |     meta = pd.read_csv(path, sep=" ") 
 23 |     return list(meta['uttID'])
 24 | 
 25 | 
 26 | def get_duration(file_path):
 27 |     duration = None
 28 |     if os.path.exists(file_path) and Path(file_path).stat().st_size > 0:
 29 |         with contextlib.closing(wave.open(file_path,'r')) as f:
 30 |             frames = f.getnframes()
 31 |             if frames>0:
 32 |                 rate = f.getframerate()
 33 |                 duration = frames / float(rate)
 34 |     return duration if duration else 0
 35 | 
 36 | def get_text(dataset_dir, file):
 37 |     txt_file = os.path.join(dataset_dir, file + '.txt') 
 38 |     with open(txt_file, 'r', encoding='utf-8') as f:
 39 |         return f.read().strip()
 40 | 
 41 | # +
 42 | def prepare_ksc(dataset_dir, files):
 43 |     file2data = {}
 44 |     for filename in files:
 45 |         with open(os.path.join(dataset_dir, 'Transcriptions', filename+'.txt'), 'r', encoding='utf-8') as f:
 46 |             text = f.read().strip()
 47 |         wav_path = os.path.join(dataset_dir, 'Audios', filename+'.wav')
 48 |         file2data[filename] = (wav_path, text)
 49 |     return file2data  
 50 | 
 51 | def prepare_others(dataset_dir, data_source):
 52 |     file2data = {}
 53 |     files = glob.glob(os.path.join(dataset_dir, data_source) + '/*.wav')
 54 |     for wav_path in files:
 55 |         filename = os.path.basename(wav_path).replace('.wav', '')
 56 |         txt_path = wav_path.replace('.wav', '.txt')
 57 |         with open(txt_path, 'r', encoding='utf-8') as f:
 58 |             text = f.read().strip()
 59 |         file2data[filename] = (wav_path, text)
 60 |     return file2data
 61 | 
 62 | 
 63 | # -
 64 | 
 65 | def append_lang(text, lang):
 66 |     new_text = ""
 67 |     for char in text: 
 68 |         if char != ' ': char += "_" + lang + ' '
 69 |         else: char = "sil "
 70 |         new_text += char
 71 |     return new_text.rstrip()
 72 | 
 73 | 
 74 | def create_data(data_files, data_source, char_type, lang):
 75 |     wav_format = '-r 16000 -c 1 -b 16 -t wav - downsample |'
 76 |     files = sorted(data_files.keys())
 77 |     write_path = 'data/' + data_source
 78 |     total_duration = 0
 79 |     with open(write_path + '/text', 'w', encoding="utf-8") as f1, \
 80 |     open(write_path + '/utt2spk', 'w', encoding="utf-8") as f2, \
 81 |     open(write_path + '/wav.scp', 'w', encoding="utf-8") as f3:
 82 |         for filename in files:
 83 |             wav_path, transcription = data_files[filename]
 84 |             total_duration += get_duration(wav_path) 
 85 |             if char_type=='phn': transcription = append_lang(transcription, lang)
 86 |             f1.write(filename + ' ' + transcription + '\n')
 87 |             f2.write(filename + ' ' + filename + '\n')
 88 |             f3.write(filename + ' sox ' + wav_path  + ' ' + wav_format +  '\n') 
 89 |         print(data_source + " duration:", total_duration / 3600)
 90 | 
 91 | 
 92 | def main():
 93 |     args = get_args()
 94 |     char_type = args.char_type
 95 | 
 96 |     if os.path.exists(args.data_dir_kz):
 97 |         data_dir_kz = args.data_dir_kz
 98 |         print('preparing KSC-335')
 99 |         for metas in [('train.csv', 'train_kz'), ('dev.csv', 'dev_kz'), ('test.csv', 'test_kz')]:
100 |             meta_csv, meta_filename = metas
101 |             if os.path.exists('data/' + meta_filename):
102 |                 meta_file = read_meta(os.path.join(data_dir_kz, 'Meta/', meta_csv))
103 |                 data = prepare_ksc(data_dir_kz, meta_file) 
104 |                 create_data(data, meta_filename, char_type, 'kz')
105 |     else: "path to ISSAI_KSC does not exist"
106 |                 
107 |     if os.path.exists(args.data_dir_en):
108 |         data_dir_en = args.data_dir_en
109 |         print('preparing CV-330')
110 |         for metas in [('train','train_en'), ('dev','dev_en'), ('test','test_en'),('test_sf','test_sf')]:
111 |             meta_dir, meta_filename = metas
112 |             if os.path.exists('data/' + meta_filename):
113 |                 data = prepare_others(data_dir_en, meta_dir)
114 |                 create_data(data, meta_filename, char_type, 'en')
115 |     else: "path to ISSAI_CV does not exist"    
116 |         
117 |     if os.path.exists(args.data_dir_ru):
118 |         data_dir_ru = args.data_dir_ru
119 |         print('preparing OpenSTT-334')
120 |         for metas in [('train','train_ru'),('dev','dev_ru'),('test_youtube','test_youtube'),('test_books','test_books')]:
121 |             meta_dir, meta_filename = metas
122 |             if os.path.exists('data/' + meta_filename):
123 |                 data = prepare_others(data_dir_ru, meta_dir)
124 |                 create_data(data, meta_filename, char_type, 'ru')
125 |     else: "path to ISSAI_OpenSTT does not exist"
126 |     
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     main()
131 | 


--------------------------------------------------------------------------------
/asr1/local/.ipynb_checkpoints/data_prep-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys, argparse, re, os, random, glob
  3 | import pandas as pd
  4 | from pathlib import Path
  5 | import wave
  6 | import contextlib
  7 | 
  8 | seed=1234
  9 | 
 10 | def get_args():
 11 |     parser = argparse.ArgumentParser(description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 12 |     parser.add_argument("--char_type", help="char or phn(for independent)")
 13 |     parser.add_argument("--data_dir_kz", help="path to ISSAI_KSC")
 14 |     parser.add_argument("--data_dir_ru", help="path to ISSAI_OpenSTT")
 15 |     parser.add_argument("--data_dir_en", help="path to ISSAI_CV")
 16 |     print(' '.join(sys.argv))
 17 |     args = parser.parse_args()
 18 |     return args
 19 | 
 20 | 
 21 | def read_meta(path):
 22 |     meta = pd.read_csv(path, sep=" ") 
 23 |     return list(meta['uttID'])
 24 | 
 25 | 
 26 | def get_duration(file_path):
 27 |     duration = None
 28 |     if os.path.exists(file_path) and Path(file_path).stat().st_size > 0:
 29 |         with contextlib.closing(wave.open(file_path,'r')) as f:
 30 |             frames = f.getnframes()
 31 |             if frames>0:
 32 |                 rate = f.getframerate()
 33 |                 duration = frames / float(rate)
 34 |     return duration if duration else 0
 35 | 
 36 | def get_text(dataset_dir, file):
 37 |     txt_file = os.path.join(dataset_dir, file + '.txt') 
 38 |     with open(txt_file, 'r', encoding='utf-8') as f:
 39 |         return f.read().strip()
 40 | 
 41 | # +
 42 | def prepare_ksc(dataset_dir, files):
 43 |     file2data = {}
 44 |     for filename in files:
 45 |         with open(os.path.join(dataset_dir, 'Transcriptions', filename+'.txt'), 'r', encoding='utf-8') as f:
 46 |             text = f.read().strip()
 47 |         wav_path = os.path.join(dataset_dir, 'Audios', filename+'.wav')
 48 |         file2data[filename] = (wav_path, text)
 49 |     return file2data  
 50 | 
 51 | def prepare_others(dataset_dir, data_source):
 52 |     file2data = {}
 53 |     files = glob.glob(os.path.join(dataset_dir, data_source) + '/*.wav')
 54 |     for wav_path in files:
 55 |         filename = os.path.basename(wav_path).replace('.wav', '')
 56 |         txt_path = wav_path.replace('.wav', '.txt')
 57 |         with open(txt_path, 'r', encoding='utf-8') as f:
 58 |             text = f.read().strip()
 59 |         file2data[filename] = (wav_path, text)
 60 |     return file2data
 61 | 
 62 | 
 63 | # -
 64 | 
 65 | def append_lang(text, lang):
 66 |     new_text = ""
 67 |     for char in text: 
 68 |         if char != ' ': char += "_" + lang + ' '
 69 |         else: char = "sil "
 70 |         new_text += char
 71 |     return new_text.rstrip()
 72 | 
 73 | 
 74 | def create_data(data_files, data_source, char_type, lang):
 75 |     wav_format = '-r 16000 -c 1 -b 16 -t wav - downsample |'
 76 |     files = sorted(data_files.keys())
 77 |     write_path = 'data/' + data_source
 78 |     total_duration = 0
 79 |     with open(write_path + '/text', 'w', encoding="utf-8") as f1, \
 80 |     open(write_path + '/utt2spk', 'w', encoding="utf-8") as f2, \
 81 |     open(write_path + '/wav.scp', 'w', encoding="utf-8") as f3:
 82 |         for filename in files:
 83 |             wav_path, transcription = data_files[filename]
 84 |             total_duration += get_duration(wav_path) 
 85 |             if char_type=='phn': transcription = append_lang(transcription, lang)
 86 |             f1.write(filename + ' ' + transcription + '\n')
 87 |             f2.write(filename + ' ' + filename + '\n')
 88 |             f3.write(filename + ' sox ' + wav_path  + ' ' + wav_format +  '\n') 
 89 |         print(data_source + " duration:", total_duration / 3600)
 90 | 
 91 | 
 92 | def main():
 93 |     args = get_args()
 94 |     char_type = args.char_type
 95 | 
 96 |     if os.path.exists(args.data_dir_kz):
 97 |         data_dir_kz = args.data_dir_kz
 98 |         print('preparing KSC-335')
 99 |         for metas in [('train.csv', 'train_kz'), ('dev.csv', 'dev_kz'), ('test.csv', 'test_kz')]:
100 |             meta_csv, meta_filename = metas
101 |             if os.path.exists('data/' + meta_filename):
102 |                 meta_file = read_meta(os.path.join(data_dir_kz, 'Meta/', meta_csv))
103 |                 data = prepare_ksc(data_dir_kz, meta_file) 
104 |                 create_data(data, meta_filename, char_type, 'kz')
105 |     else: "path to ISSAI_KSC does not exist"
106 |                 
107 |     if os.path.exists(args.data_dir_en):
108 |         data_dir_en = args.data_dir_en
109 |         print('preparing CV-330')
110 |         for metas in [('train','train_en'), ('dev','dev_en'), ('test','test_en'),('test_sf','test_sf')]:
111 |             meta_dir, meta_filename = metas
112 |             if os.path.exists('data/' + meta_filename):
113 |                 data = prepare_others(data_dir_en, meta_dir)
114 |                 create_data(data, meta_filename, char_type, 'en')
115 |     else: "path to ISSAI_CV does not exist"    
116 |         
117 |     if os.path.exists(args.data_dir_ru):
118 |         data_dir_ru = args.data_dir_ru
119 |         print('preparing OpenSTT-334')
120 |         for metas in [('train','train_ru'),('dev','dev_ru'),('test_youtube','test_youtube'),('test_books','test_books')]:
121 |             meta_dir, meta_filename = metas
122 |             if os.path.exists('data/' + meta_filename):
123 |                 data = prepare_others(data_dir_ru, meta_dir)
124 |                 create_data(data, meta_filename, char_type, 'ru')
125 |     else: "path to ISSAI_OpenSTT does not exist"
126 |     
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     main()
131 | 


--------------------------------------------------------------------------------
/asr1/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2018 Johns Hopkins University (Matthew Wiesner)
  4 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  5 | 
  6 | . ./path.sh || exit 1;
  7 | . ./cmd.sh || exit 1;
  8 | 
  9 | 
 10 | exptype=
 11 | 
 12 | # general configuration
 13 | backend=pytorch
 14 | stage=0      # start from 0 if you need to start from data preparation
 15 | stop_stage=100
 16 | ngpu=5         # number of gpus ("0" uses cpu, otherwise use gpu)
 17 | seed=1
 18 | debugmode=1
 19 | dumpdir=dump   # directory to dump full features
 20 | N=0            # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.
 21 | verbose=0      # verbose option
 22 | resume= # Resume the training from snapshot
 23 | do_delta=false
 24 | 
 25 | preprocess_config=conf/specaug.yaml #uncomment if using specaugment
 26 | train_config=conf/train_pytorch_transformer_large_ngpu4.yaml
 27 | lm_config=conf/lm.yaml
 28 | decode_config=conf/decode_pytorch_transformer_large.yaml
 29 | 
 30 | # rnnlm related
 31 | use_lm=true
 32 | train_lm=false
 33 | lm_resume=        # specify a snapshot file to resume LM training
 34 | lmtag=''            # tag for managing LMs
 35 | perturb=true
 36 | 
 37 | # decoding parameter
 38 | recog_model=model.acc.best # set a model to be used for decoding: 'model.acc.best' or 'model.loss.best'
 39 | n_average=10 # use 1 for RNN models
 40 | 
 41 | # exp tag
 42 | tag="" # tag for managing experiments.#specaugment_v1_nospeed
 43 | 
 44 | . utils/parse_options.sh || exit 1;
 45 | 
 46 | # Set bash to 'debug' mode, it will exit on :
 47 | # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
 48 | set -e
 49 | set -u
 50 | set -o pipefail
 51 | 
 52 | # Train Directories
 53 | train_set=
 54 | train_dev=
 55 | test_set=
 56 | 
 57 | char_type="char"
 58 | 
 59 | data_dir_kz='/home/datasets/ISSAI_KSC_335RS_v3/'
 60 | data_dir_ru='/home/datasets/ISSAI_OpenSTT_CS334/'
 61 | data_dir_en='/home/datasets/ISSAI_CV_330/'
 62 | 
 63 | if [[ $exptype == 'mono_kz' ]]; then
 64 |  echo "running monolingual kz experiment"
 65 |  train_set=train_kz
 66 |  train_dev=dev_kz
 67 |  test_set=test_kz
 68 | elif [[ $exptype == 'mono_ru' ]]; then
 69 |  echo "running monolingual ru experiment"
 70 |  train_set=train_ru
 71 |  train_dev=dev_ru
 72 |  test_set="test_youtube test_books"
 73 | elif [[ $exptype == 'mono_en' ]]; then
 74 |  echo "running monolingual en experiment"
 75 |  train_set=train_en
 76 |  train_dev=dev_en
 77 |  test_set="test_en test_sf" 
 78 | elif [[ $exptype == 'mlt_independent' ]] || [[ $exptype == 'mlt_combined' ]]; then
 79 |  echo "running multilingual experiment"
 80 |  train_set="train_kz train_ru train_en"
 81 |  train_dev="dev_kz dev_ru dev_en"
 82 |  test_set="test_youtube test_books test_kz test_en test_sf" 
 83 | else
 84 |  echo "Please select the experiment type"
 85 |  exit 1;
 86 | fi
 87 | 
 88 | if [[ $exptype == 'mlt_independent' ]]; then
 89 |  char_type="phn"
 90 | fi
 91 | 
 92 | # LM Directories
 93 | if [ -z ${lmtag} ]; then
 94 |     lmtag=$(basename ${lm_config%.*})
 95 | fi
 96 | 
 97 | if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
 98 |   echo "stage 0: Setting up directories"
 99 |   
100 |   arg_opts="--char_type ${char_type}"
101 |   mkdir -p data
102 |   for x in ${train_set} ${train_dev} ${test_set}; do
103 |     mkdir -p data/$x
104 |   done
105 |    if [ -d $data_dir_kz ]; then
106 |     arg_opts="--data_dir_kz ${data_dir_kz} ${arg_opts}"
107 |    fi
108 | 
109 |    if [ -d $data_dir_ru ]; then
110 |     arg_opts="--data_dir_ru ${data_dir_ru} ${arg_opts}"
111 |    fi
112 | 
113 |    if [ -d $data_dir_en ]; then
114 |     arg_opts="--data_dir_en ${data_dir_en} ${arg_opts}"
115 |    fi
116 |   echo "preparing data"
117 |   local/data_prep.py $arg_opts
118 | fi
119 | 
120 | if [[ $exptype == 'mlt_independent' ]] || [[ $exptype == 'mlt_combined' ]]; then
121 |   train_set=train
122 |   train_dev=dev
123 | fi
124 | 
125 | lmexpname=${train_set}_rnnlm_${backend}_${lmtag}
126 | lmexpdir=exp/${lmexpname}
127 | lm_train_set=data/local/train.txt
128 | lm_valid_set=data/local/dev.txt
129 | 
130 | 
131 | if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
132 | 
133 |   if [[ $exptype == 'mlt_independent' ]] || [[ $exptype == 'mlt_combined' ]]; then
134 |     for x in ${train_set} ${train_dev}; do
135 |       mkdir -p data/$x
136 |     done
137 |     for x in 'wav.scp' 'utt2spk' 'text'; do
138 |           for y in ${train_set} ${train_dev}; do
139 |               cat data/${y}_kz/${x} data/${y}_ru/${x} data/${y}_en/${x} | sort -s -k1,1 > data/${y}/${x}; 
140 |           done
141 |     done
142 |   fi
143 | 
144 |   for x in ${train_set} ${train_dev} ${test_set}; do
145 |     utils/utt2spk_to_spk2utt.pl data/${x}/utt2spk > data/${x}/spk2utt
146 |     sed -i.bak -e "s/$/ sox -R -t wav - -t wav - rate 16000 dither | /" data/${x}/wav.scp
147 |   done
148 | fi
149 | 
150 | if $perturb; then
151 |     rm -r data/${train_set}_sp
152 |     cp data/${train_set} data/${train_set}_sp -r
153 |     train_set=${train_set}_sp
154 | fi
155 | 
156 | feat_tr_dir=${dumpdir}/${train_set}/delta${do_delta}; mkdir -p ${feat_tr_dir}
157 | feat_dt_dir=${dumpdir}/${train_dev}/delta${do_delta}; mkdir -p ${feat_dt_dir}
158 | for x in ${test_set}; do
159 |     feat_recog_dir=${dumpdir}/${x}/delta${do_delta}; mkdir -p ${feat_recog_dir}
160 | done
161 | 
162 | if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
163 |   echo "stage 2: Feature extraction"
164 |   
165 |   if $perturb; then
166 |     utils/perturb_data_dir_speed.sh 0.9 data/${train_set} data/temp1
167 |     utils/perturb_data_dir_speed.sh 1.0 data/${train_set} data/temp2
168 |     utils/perturb_data_dir_speed.sh 1.1 data/${train_set} data/temp3
169 |     utils/combine_data.sh --extra-files utt2uniq data/${train_set} data/temp1 data/temp2 data/temp3
170 |     rm -r data/temp1 data/temp2 data/temp3
171 |   fi
172 |   
173 |   fbankdir=fbank
174 |   # Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame
175 |   for x in ${train_set} ${train_dev} ${test_set}; do
176 |       steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 20 --write_utt2num_frames true \
177 |           data/${x} exp/make_fbank/${x} ${fbankdir}
178 |       utils/fix_data_dir.sh data/${x}
179 |   done
180 | 
181 |   # compute global CMVN
182 |   compute-cmvn-stats scp:data/${train_set}/feats.scp data/${train_set}/cmvn.ark
183 |   utils/fix_data_dir.sh data/${train_set}
184 | 
185 |   exp_name=$(basename $PWD)
186 | 
187 |   dump.sh --cmd "$train_cmd" --nj 20 --do_delta ${do_delta} \
188 |       data/${train_set}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/${train_set} ${feat_tr_dir}
189 |   dump.sh --cmd "$train_cmd" --nj 10 --do_delta ${do_delta} \
190 |       data/${train_dev}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/${train_dev} ${feat_dt_dir}
191 |   for x in ${test_set}; do  
192 |     feat_recog_dir=${dumpdir}/${x}/delta${do_delta};
193 |     dump.sh --cmd "$train_cmd" --nj 10 --do_delta ${do_delta} \
194 |         data/${x}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/${x} ${feat_recog_dir}
195 |   done
196 | fi
197 | 
198 | dict=data/lang_1char/${train_set}_units.txt
199 | nlsyms=data/lang_1char/non_lang_syms.txt
200 | 
201 | echo "dictionary: ${dict}"
202 | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
203 |     ### Task dependent. You have to check non-linguistic symbols used in the corpus.
204 |     echo "stage 3: Dictionary and Json Data Preparation"
205 |     mkdir -p data/lang_1char/
206 | 
207 |     echo "make a non-linguistic symbol list"
208 |     #cut -d " " -f 2- data/train/text | tr " " "\n" | sort | uniq | grep "<" > ${nlsyms}
209 |     #cat " " > ${nlsyms}
210 |     touch ${nlsyms}
211 | 
212 |     echo "make a dictionary"
213 | 
214 |     echo "<unk> 1" > ${dict} # <unk> must be 1, 0 will be used for "blank" in CTC
215 |     text2token.py -s 1 -n 1 -l ${nlsyms} data/${train_set}/text -t ${char_type} | cut -f 2- -d" " | tr " " "\n" \
216 |     | sort | uniq | grep -v -e '^\s*$' | grep -v '<unk>' | awk '{print $0 " " NR+1}' >> ${dict}
217 |     wc -l ${dict}
218 |     echo "make json files"
219 | 
220 |     data2json.sh --feat ${feat_tr_dir}/feats.scp --nlsyms ${nlsyms} --trans_type ${char_type}  \
221 |      data/${train_set} ${dict} > ${feat_tr_dir}/data.json
222 | 
223 |     data2json.sh --feat ${feat_dt_dir}/feats.scp --nlsyms ${nlsyms} --trans_type ${char_type}  \
224 |      data/${train_dev} ${dict} > ${feat_dt_dir}/data.json
225 |     
226 |     
227 |     for x in ${test_set}; do  
228 |       feat_recog_dir=${dumpdir}/${x}/delta${do_delta};
229 |       data2json.sh --feat ${feat_recog_dir}/feats.scp --nlsyms ${nlsyms} --trans_type ${char_type}  \
230 |        data/${x} ${dict} > ${feat_recog_dir}/data.json
231 |     done
232 | 
233 | 
234 | 
235 | fi
236 | 
237 | 
238 | if ${use_lm} && ${train_lm}; then
239 |   lm_train_set=data/local/train.txt
240 |   lm_valid_set=data/local/dev.txt
241 |   
242 |   echo "Preparing LM data"
243 |   
244 |   mkdir -p data/local/
245 |   
246 |   text2token.py --nchar 1 \
247 |                 --space "<space>" \
248 |                 --trans_type ${char_type} \
249 |                 --non-lang-syms data/lang_1char/non_lang_syms.txt \
250 |                 <(cut -d' ' -f2- data/${train_set}/text) \
251 |                 > ${lm_train_set}
252 | 
253 |   text2token.py --nchar 1 \
254 |                 --space "<space>" \
255 |                 --trans_type ${char_type} \
256 |                 --non-lang-syms data/lang_1char/non_lang_syms.txt \
257 |                 <(cut -d' ' -f2- data/${train_dev}/text) \
258 |                 > ${lm_valid_set}
259 | 
260 |   ${cuda_cmd} --gpu ${ngpu} ${lmexpdir}/train.log \
261 |           lm_train.py \
262 |           --config ${lm_config} \
263 |           --ngpu ${ngpu} \
264 |           --backend ${backend} \
265 |           --verbose 1 \
266 |           --outdir ${lmexpdir} \
267 |           --tensorboard-dir tensorboard/${lmexpname} \
268 |           --train-label ${lm_train_set} \
269 |           --valid-label ${lm_valid_set} \
270 |           --resume ${lm_resume} \
271 |           --dict ${dict}
272 | fi
273 | 
274 | 
275 | if [ -z ${tag} ]; then
276 |     expname=${train_set}_${backend}_$(basename ${train_config%.*})
277 |     if ${do_delta}; then
278 |         expname=${expname}_delta
279 |     fi
280 |     if [ -n "${preprocess_config}" ]; then
281 |         expname=${expname}_$(basename ${preprocess_config%.*})
282 |     fi
283 | else
284 |     expname=${train_set}_${backend}_${tag}
285 | fi
286 | expdir=exp/${expname}
287 | mkdir -p ${expdir}
288 | 
289 | if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
290 |     echo "stage 4: Network Training"
291 | 
292 |     ${cuda_cmd} --gpu ${ngpu} ${expdir}/train.log \
293 |         asr_train.py \
294 |         --config ${train_config} \
295 |         --preprocess-conf ${preprocess_config} \
296 |         --ngpu ${ngpu} \
297 |         --backend ${backend} \
298 |         --outdir ${expdir}/results \
299 |         --tensorboard-dir tensorboard/${expname} \
300 |         --debugmode ${debugmode} \
301 |         --dict ${dict} \
302 |         --debugdir ${expdir} \
303 |         --minibatches ${N} \
304 |         --verbose ${verbose} \
305 |         --resume ${resume} \
306 |         --seed ${seed} \
307 |         --train-json ${feat_tr_dir}/data.json \
308 |         --valid-json ${feat_dt_dir}/data.json
309 | fi
310 | 
311 | 
312 | if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
313 |     echo "stage 5: Decoding"
314 |     nj=32
315 | 
316 |     if [[ $(get_yaml.py ${train_config} model-module) = *transformer* ]] || \
317 |        [[ $(get_yaml.py ${train_config} model-module) = *conformer* ]]; then
318 |         recog_model=model.last${n_average}.avg.best
319 |         average_checkpoints.py --backend ${backend} \
320 |                                --snapshots ${expdir}/results/snapshot.ep.* \
321 |                                --out ${expdir}/results/${recog_model} \
322 |                                --num ${n_average}
323 |     fi
324 |     extra_opts=""
325 |     if ${use_lm}; then
326 |       extra_opts="--rnnlm ${lmexpdir}/rnnlm.model.best ${extra_opts}"
327 |     fi
328 | 
329 |     pids=() # initialize pids
330 |     for rtask in ${test_set}; do
331 |     (
332 |         decode_dir=decode_${rtask}_$(basename ${decode_config%.*})
333 |         if ${use_lm}; then
334 |             decode_dir=${decode_dir}_rnnlm_${lmtag}
335 |         fi
336 |         feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta}
337 | 
338 |         # split data
339 |         splitjson.py --parts ${nj} ${feat_recog_dir}/data.json
340 | 
341 |         #### use CPU for decoding
342 |         ngpu=0
343 | 
344 |         ${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \
345 |             asr_recog.py \
346 |             --config ${decode_config} \
347 |             --ngpu ${ngpu} \
348 |             --backend ${backend} \
349 |             --recog-json ${feat_recog_dir}/split${nj}utt/data.JOB.json \
350 |             --result-label ${expdir}/${decode_dir}/data.JOB.json \
351 |             --model ${expdir}/results/${recog_model}  \
352 |             ${extra_opts}
353 | 
354 |         score_sclite.sh --wer true --nlsyms ${nlsyms} ${expdir}/${decode_dir} ${dict}
355 | 
356 |     ) &
357 |     pids+=($!) # store background pids
358 |     done
359 |     i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done
360 |     [ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." && false
361 |     echo "Finished"
362 | fi
363 | 


--------------------------------------------------------------------------------
/asr1/.ipynb_checkpoints/run-checkpoint.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2018 Johns Hopkins University (Matthew Wiesner)
  4 | #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
  5 | 
  6 | . ./path.sh || exit 1;
  7 | . ./cmd.sh || exit 1;
  8 | 
  9 | 
 10 | exptype=
 11 | 
 12 | # general configuration
 13 | backend=pytorch
 14 | stage=0      # start from 0 if you need to start from data preparation
 15 | stop_stage=100
 16 | ngpu=5         # number of gpus ("0" uses cpu, otherwise use gpu)
 17 | seed=1
 18 | debugmode=1
 19 | dumpdir=dump   # directory to dump full features
 20 | N=0            # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.
 21 | verbose=0      # verbose option
 22 | resume= # Resume the training from snapshot
 23 | do_delta=false
 24 | 
 25 | preprocess_config=conf/specaug.yaml #uncomment if using specaugment
 26 | train_config=conf/train_pytorch_transformer_large_ngpu4.yaml
 27 | lm_config=conf/lm.yaml
 28 | decode_config=conf/decode_pytorch_transformer_large.yaml
 29 | 
 30 | # rnnlm related
 31 | use_lm=true
 32 | train_lm=false
 33 | lm_resume=        # specify a snapshot file to resume LM training
 34 | lmtag=''            # tag for managing LMs
 35 | perturb=true
 36 | 
 37 | # decoding parameter
 38 | recog_model=model.acc.best # set a model to be used for decoding: 'model.acc.best' or 'model.loss.best'
 39 | n_average=10 # use 1 for RNN models
 40 | 
 41 | # exp tag
 42 | tag="" # tag for managing experiments.#specaugment_v1_nospeed
 43 | 
 44 | . utils/parse_options.sh || exit 1;
 45 | 
 46 | # Set bash to 'debug' mode, it will exit on :
 47 | # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
 48 | set -e
 49 | set -u
 50 | set -o pipefail
 51 | 
 52 | # Train Directories
 53 | train_set=
 54 | train_dev=
 55 | test_set=
 56 | 
 57 | char_type="char"
 58 | 
 59 | data_dir_kz='/home/datasets/ISSAI_KSC_335RS_v3/'
 60 | data_dir_ru='/home/datasets/ISSAI_OpenSTT_CS334/'
 61 | data_dir_en='/home/datasets/ISSAI_CV_330/'
 62 | 
 63 | if [[ $exptype == 'mono_kz' ]]; then
 64 |  echo "running monolingual kz experiment"
 65 |  train_set=train_kz
 66 |  train_dev=dev_kz
 67 |  test_set=test_kz
 68 | elif [[ $exptype == 'mono_ru' ]]; then
 69 |  echo "running monolingual ru experiment"
 70 |  train_set=train_ru
 71 |  train_dev=dev_ru
 72 |  test_set="test_youtube test_books"
 73 | elif [[ $exptype == 'mono_en' ]]; then
 74 |  echo "running monolingual en experiment"
 75 |  train_set=train_en
 76 |  train_dev=dev_en
 77 |  test_set="test_en test_sf" 
 78 | elif [[ $exptype == 'mlt_independent' ]] || [[ $exptype == 'mlt_combined' ]]; then
 79 |  echo "running multilingual experiment"
 80 |  train_set="train_kz train_ru train_en"
 81 |  train_dev="dev_kz dev_ru dev_en"
 82 |  test_set="test_youtube test_books test_kz test_en test_sf" 
 83 | else
 84 |  echo "Please select the experiment type"
 85 |  exit 1;
 86 | fi
 87 | 
 88 | if [[ $exptype == 'mlt_independent' ]]; then
 89 |  char_type="phn"
 90 | fi
 91 | 
 92 | # LM Directories
 93 | if [ -z ${lmtag} ]; then
 94 |     lmtag=$(basename ${lm_config%.*})
 95 | fi
 96 | 
 97 | if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
 98 |   echo "stage 0: Setting up directories"
 99 |   
100 |   arg_opts="--char_type ${char_type}"
101 |   mkdir -p data
102 |   for x in ${train_set} ${train_dev} ${test_set}; do
103 |     mkdir -p data/$x
104 |   done
105 |    if [ -d $data_dir_kz ]; then
106 |     arg_opts="--data_dir_kz ${data_dir_kz} ${arg_opts}"
107 |    fi
108 | 
109 |    if [ -d $data_dir_ru ]; then
110 |     arg_opts="--data_dir_ru ${data_dir_ru} ${arg_opts}"
111 |    fi
112 | 
113 |    if [ -d $data_dir_en ]; then
114 |     arg_opts="--data_dir_en ${data_dir_en} ${arg_opts}"
115 |    fi
116 |   echo "preparing data"
117 |   local/data_prep.py $arg_opts
118 | fi
119 | 
120 | if [[ $exptype == 'mlt_independent' ]] || [[ $exptype == 'mlt_combined' ]]; then
121 |   train_set=train
122 |   train_dev=dev
123 | fi
124 | 
125 | lmexpname=${train_set}_rnnlm_${backend}_${lmtag}
126 | lmexpdir=exp/${lmexpname}
127 | lm_train_set=data/local/train.txt
128 | lm_valid_set=data/local/dev.txt
129 | 
130 | 
131 | if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
132 | 
133 |   if [[ $exptype == 'mlt_independent' ]] || [[ $exptype == 'mlt_combined' ]]; then
134 |     for x in ${train_set} ${train_dev}; do
135 |       mkdir -p data/$x
136 |     done
137 |     for x in 'wav.scp' 'utt2spk' 'text'; do
138 |           for y in ${train_set} ${train_dev}; do
139 |               cat data/${y}_kz/${x} data/${y}_ru/${x} data/${y}_en/${x} | sort -s -k1,1 > data/${y}/${x}; 
140 |           done
141 |     done
142 |   fi
143 | 
144 |   for x in ${train_set} ${train_dev} ${test_set}; do
145 |     utils/utt2spk_to_spk2utt.pl data/${x}/utt2spk > data/${x}/spk2utt
146 |     sed -i.bak -e "s/$/ sox -R -t wav - -t wav - rate 16000 dither | /" data/${x}/wav.scp
147 |   done
148 | fi
149 | 
150 | if $perturb; then
151 |     rm -r data/${train_set}_sp
152 |     cp data/${train_set} data/${train_set}_sp -r
153 |     train_set=${train_set}_sp
154 | fi
155 | 
156 | feat_tr_dir=${dumpdir}/${train_set}/delta${do_delta}; mkdir -p ${feat_tr_dir}
157 | feat_dt_dir=${dumpdir}/${train_dev}/delta${do_delta}; mkdir -p ${feat_dt_dir}
158 | for x in ${test_set}; do
159 |     feat_recog_dir=${dumpdir}/${x}/delta${do_delta}; mkdir -p ${feat_recog_dir}
160 | done
161 | 
162 | if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
163 |   echo "stage 2: Feature extraction"
164 |   
165 |   if $perturb; then
166 |     utils/perturb_data_dir_speed.sh 0.9 data/${train_set} data/temp1
167 |     utils/perturb_data_dir_speed.sh 1.0 data/${train_set} data/temp2
168 |     utils/perturb_data_dir_speed.sh 1.1 data/${train_set} data/temp3
169 |     utils/combine_data.sh --extra-files utt2uniq data/${train_set} data/temp1 data/temp2 data/temp3
170 |     rm -r data/temp1 data/temp2 data/temp3
171 |   fi
172 |   
173 |   fbankdir=fbank
174 |   # Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame
175 |   for x in ${train_set} ${train_dev} ${test_set}; do
176 |       steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 20 --write_utt2num_frames true \
177 |           data/${x} exp/make_fbank/${x} ${fbankdir}
178 |       utils/fix_data_dir.sh data/${x}
179 |   done
180 | 
181 |   # compute global CMVN
182 |   compute-cmvn-stats scp:data/${train_set}/feats.scp data/${train_set}/cmvn.ark
183 |   utils/fix_data_dir.sh data/${train_set}
184 | 
185 |   exp_name=$(basename $PWD)
186 | 
187 |   dump.sh --cmd "$train_cmd" --nj 20 --do_delta ${do_delta} \
188 |       data/${train_set}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/${train_set} ${feat_tr_dir}
189 |   dump.sh --cmd "$train_cmd" --nj 10 --do_delta ${do_delta} \
190 |       data/${train_dev}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/${train_dev} ${feat_dt_dir}
191 |   for x in ${test_set}; do  
192 |     feat_recog_dir=${dumpdir}/${x}/delta${do_delta};
193 |     dump.sh --cmd "$train_cmd" --nj 10 --do_delta ${do_delta} \
194 |         data/${x}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/${x} ${feat_recog_dir}
195 |   done
196 | fi
197 | 
198 | dict=data/lang_1char/${train_set}_units.txt
199 | nlsyms=data/lang_1char/non_lang_syms.txt
200 | 
201 | echo "dictionary: ${dict}"
202 | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
203 |     ### Task dependent. You have to check non-linguistic symbols used in the corpus.
204 |     echo "stage 3: Dictionary and Json Data Preparation"
205 |     mkdir -p data/lang_1char/
206 | 
207 |     echo "make a non-linguistic symbol list"
208 |     #cut -d " " -f 2- data/train/text | tr " " "\n" | sort | uniq | grep "<" > ${nlsyms}
209 |     #cat " " > ${nlsyms}
210 |     touch ${nlsyms}
211 | 
212 |     echo "make a dictionary"
213 | 
214 |     echo "<unk> 1" > ${dict} # <unk> must be 1, 0 will be used for "blank" in CTC
215 |     text2token.py -s 1 -n 1 -l ${nlsyms} data/${train_set}/text -t ${char_type} | cut -f 2- -d" " | tr " " "\n" \
216 |     | sort | uniq | grep -v -e '^\s*$' | grep -v '<unk>' | awk '{print $0 " " NR+1}' >> ${dict}
217 |     wc -l ${dict}
218 |     echo "make json files"
219 | 
220 |     data2json.sh --feat ${feat_tr_dir}/feats.scp --nlsyms ${nlsyms} --trans_type ${char_type}  \
221 |      data/${train_set} ${dict} > ${feat_tr_dir}/data.json
222 | 
223 |     data2json.sh --feat ${feat_dt_dir}/feats.scp --nlsyms ${nlsyms} --trans_type ${char_type}  \
224 |      data/${train_dev} ${dict} > ${feat_dt_dir}/data.json
225 |     
226 |     
227 |     for x in ${test_set}; do  
228 |       feat_recog_dir=${dumpdir}/${x}/delta${do_delta};
229 |       data2json.sh --feat ${feat_recog_dir}/feats.scp --nlsyms ${nlsyms} --trans_type ${char_type}  \
230 |        data/${x} ${dict} > ${feat_recog_dir}/data.json
231 |     done
232 | 
233 | 
234 | 
235 | fi
236 | 
237 | 
238 | if ${use_lm} && ${train_lm}; then
239 |   lm_train_set=data/local/train.txt
240 |   lm_valid_set=data/local/dev.txt
241 |   
242 |   echo "Preparing LM data"
243 |   
244 |   mkdir -p data/local/
245 |   
246 |   text2token.py --nchar 1 \
247 |                 --space "<space>" \
248 |                 --trans_type ${char_type} \
249 |                 --non-lang-syms data/lang_1char/non_lang_syms.txt \
250 |                 <(cut -d' ' -f2- data/${train_set}/text) \
251 |                 > ${lm_train_set}
252 | 
253 |   text2token.py --nchar 1 \
254 |                 --space "<space>" \
255 |                 --trans_type ${char_type} \
256 |                 --non-lang-syms data/lang_1char/non_lang_syms.txt \
257 |                 <(cut -d' ' -f2- data/${train_dev}/text) \
258 |                 > ${lm_valid_set}
259 | 
260 |   ${cuda_cmd} --gpu ${ngpu} ${lmexpdir}/train.log \
261 |           lm_train.py \
262 |           --config ${lm_config} \
263 |           --ngpu ${ngpu} \
264 |           --backend ${backend} \
265 |           --verbose 1 \
266 |           --outdir ${lmexpdir} \
267 |           --tensorboard-dir tensorboard/${lmexpname} \
268 |           --train-label ${lm_train_set} \
269 |           --valid-label ${lm_valid_set} \
270 |           --resume ${lm_resume} \
271 |           --dict ${dict}
272 | fi
273 | 
274 | 
275 | if [ -z ${tag} ]; then
276 |     expname=${train_set}_${backend}_$(basename ${train_config%.*})
277 |     if ${do_delta}; then
278 |         expname=${expname}_delta
279 |     fi
280 |     if [ -n "${preprocess_config}" ]; then
281 |         expname=${expname}_$(basename ${preprocess_config%.*})
282 |     fi
283 | else
284 |     expname=${train_set}_${backend}_${tag}
285 | fi
286 | expdir=exp/${expname}
287 | mkdir -p ${expdir}
288 | 
289 | if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
290 |     echo "stage 4: Network Training"
291 | 
292 |     ${cuda_cmd} --gpu ${ngpu} ${expdir}/train.log \
293 |         asr_train.py \
294 |         --config ${train_config} \
295 |         --preprocess-conf ${preprocess_config} \
296 |         --ngpu ${ngpu} \
297 |         --backend ${backend} \
298 |         --outdir ${expdir}/results \
299 |         --tensorboard-dir tensorboard/${expname} \
300 |         --debugmode ${debugmode} \
301 |         --dict ${dict} \
302 |         --debugdir ${expdir} \
303 |         --minibatches ${N} \
304 |         --verbose ${verbose} \
305 |         --resume ${resume} \
306 |         --seed ${seed} \
307 |         --train-json ${feat_tr_dir}/data.json \
308 |         --valid-json ${feat_dt_dir}/data.json
309 | fi
310 | 
311 | 
312 | if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
313 |     echo "stage 5: Decoding"
314 |     nj=32
315 | 
316 |     if [[ $(get_yaml.py ${train_config} model-module) = *transformer* ]] || \
317 |        [[ $(get_yaml.py ${train_config} model-module) = *conformer* ]]; then
318 |         recog_model=model.last${n_average}.avg.best
319 |         average_checkpoints.py --backend ${backend} \
320 |                                --snapshots ${expdir}/results/snapshot.ep.* \
321 |                                --out ${expdir}/results/${recog_model} \
322 |                                --num ${n_average}
323 |     fi
324 |     extra_opts=""
325 |     if ${use_lm}; then
326 |       extra_opts="--rnnlm ${lmexpdir}/rnnlm.model.best ${extra_opts}"
327 |     fi
328 | 
329 |     pids=() # initialize pids
330 |     for rtask in ${test_set}; do
331 |     (
332 |         decode_dir=decode_${rtask}_$(basename ${decode_config%.*})
333 |         if ${use_lm}; then
334 |             decode_dir=${decode_dir}_rnnlm_${lmtag}
335 |         fi
336 |         feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta}
337 | 
338 |         # split data
339 |         splitjson.py --parts ${nj} ${feat_recog_dir}/data.json
340 | 
341 |         #### use CPU for decoding
342 |         ngpu=0
343 | 
344 |         ${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \
345 |             asr_recog.py \
346 |             --config ${decode_config} \
347 |             --ngpu ${ngpu} \
348 |             --backend ${backend} \
349 |             --recog-json ${feat_recog_dir}/split${nj}utt/data.JOB.json \
350 |             --result-label ${expdir}/${decode_dir}/data.JOB.json \
351 |             --model ${expdir}/results/${recog_model}  \
352 |             ${extra_opts}
353 | 
354 |         score_sclite.sh --wer true --nlsyms ${nlsyms} ${expdir}/${decode_dir} ${dict}
355 | 
356 |     ) &
357 |     pids+=($!) # store background pids
358 |     done
359 |     i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done
360 |     [ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." && false
361 |     echo "Finished"
362 | fi
363 | 


--------------------------------------------------------------------------------