├── .clang-format ├── .flake8 ├── .gitconfig ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── stale.yml ├── .gitignore ├── .gitlab-ci.yml ├── .mergify.yml ├── .pip └── pip.conf ├── .pylintrc ├── .style.yapf ├── .travis.yml ├── .vimrc ├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS ├── README.md ├── RELEASE.md ├── core ├── __init__.py └── ops │ ├── .gitignore │ ├── Makefile │ ├── __init__.py │ ├── build.sh │ ├── data │ ├── noise │ │ └── babble_16k.pcm │ ├── noiselist.scp │ ├── rir │ │ ├── 0.rir │ │ ├── 1.rir │ │ ├── 2.rir │ │ ├── 3.rir │ │ └── 4.rir │ ├── rirlist.scp │ └── sm1_cln.wav │ ├── gen_build.py │ ├── kernels │ ├── __init__.py │ ├── add_rir_noise_aecres │ │ ├── BaseLib.cpp │ │ ├── BaseLib.h │ │ ├── CAdd_All.cpp │ │ ├── CAdd_All.h │ │ ├── CConv.cpp │ │ ├── CConv.h │ │ ├── CEcho.cpp │ │ ├── CEcho.h │ │ ├── add_rir_noise_aecres.cpp │ │ ├── add_rir_noise_aecres.h │ │ ├── addecho.cpp │ │ ├── addecho.h │ │ ├── audio.cpp │ │ ├── audio.h │ │ ├── conv.cpp │ │ ├── conv.h │ │ └── typedefs_sh.h │ ├── add_rir_noise_aecres_op.cc │ ├── analyfiltbank.cc │ ├── analyfiltbank.h │ ├── analyfiltbank_op.cc │ ├── cepstrum.cc │ ├── cepstrum.h │ ├── cepstrum_op.cc │ ├── complex_defines.h │ ├── delta_delta.cc │ ├── delta_delta.h │ ├── delta_delta_op.cc │ ├── fbank.cc │ ├── fbank.h │ ├── fbank_op.cc │ ├── framepow.cc │ ├── framepow.h │ ├── framepow_op.cc │ ├── jieba_op.cc │ ├── jieba_op_test.py │ ├── mel_spectrum.cc │ ├── mel_spectrum.h │ ├── mel_spectrum_op.cc │ ├── mfcc_dct.cc │ ├── mfcc_dct.h │ ├── mfcc_dct_op.cc │ ├── mfcc_mel_filterbank.cc │ ├── mfcc_mel_filterbank.h │ ├── ngram_op.cc │ ├── ngram_op_test.py │ ├── pitch.cc │ ├── pitch.h │ ├── pitch_op.cc │ ├── plp.cc │ ├── plp.h │ ├── plp_op.cc │ ├── resample.cc │ ├── resample.h │ ├── simple_vocab.cc │ ├── simple_vocab.h │ ├── simple_vocab_op.cc │ ├── simple_vocab_op_test.py │ ├── spectrum.cc │ ├── spectrum.h │ ├── spectrum_op.cc │ ├── speed_op.cc │ ├── string_utils_op.cc │ ├── string_utils_test.py │ ├── support_functions.cc │ ├── support_functions.h │ ├── synthfiltbank.cc │ ├── synthfiltbank.h │ ├── synthfiltbank_op.cc │ ├── tokenizer_ops.cc │ ├── tokenizer_ops_test.py │ ├── x_ops.cc │ ├── zcr.cc │ ├── zcr.h │ └── zcr_op.cc │ └── py_x_ops.py ├── delta-serving ├── README.md ├── core │ ├── conf │ │ └── delta_conf.go │ ├── delta_serving.go │ ├── delta_serving_test.go │ ├── handler │ │ ├── delta_model_handler.go │ │ └── delta_predict_handler.go │ ├── model │ │ └── delta_model.go │ ├── pool │ │ ├── delta_dispatcher.go │ │ └── delta_work.go │ ├── types │ │ ├── delta_types.go │ │ └── delta_types_test.go │ └── utils │ │ ├── filepath_util.go │ │ └── filepath_util_test.go ├── examples │ ├── Dockerfile │ ├── build.sh │ ├── main.go │ ├── run.sh │ └── server-env.sh ├── go.mod ├── go.sum └── mock_yaml │ └── nlp │ └── model.yaml ├── delta ├── __init__.py ├── compat.py ├── compat_test.py ├── configs │ ├── atis2_nlu_joint_lstm_crf.yml │ ├── atis_nlu_joint_lstm_crf.yml │ ├── cnn_cls_mock.yml │ ├── cnn_daily_mail_s2s_lstm.yml │ ├── cnn_daily_mail_s2s_transformer.yml │ ├── conll_2003_seq_label_bert.yml │ ├── conll_2003_seq_label_elmo.yml │ ├── conll_2003_seq_label_lstm_crf.yml │ ├── han_cls_mock.yml │ ├── nlu_joint_mock.yml │ ├── rnn_match_mock.yml │ ├── seq_label_mock.yml │ ├── snli_match_rnn.yml │ ├── transformer_s2s_mock.yml │ ├── trec_text_cls_cnn.yml │ └── yahoo_answer_text_cls_han.yml ├── data │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ ├── atis.py │ │ ├── atis2.py │ │ ├── atis2_test.py │ │ ├── atis_test.py │ │ ├── base_dataset.py │ │ ├── build.py │ │ ├── conll_2003.py │ │ ├── conll_2003_test.py │ │ ├── mock_text_cls_data.py │ │ ├── mock_text_cls_test.py │ │ ├── mock_text_match_data.py │ │ ├── mock_text_match_test.py │ │ ├── mock_text_nlu_joint_data.py │ │ ├── mock_text_nlu_joint_test.py │ │ ├── mock_text_seq2seq_data.py │ │ ├── mock_text_seq2seq_test.py │ │ ├── mock_text_seq_label_data.py │ │ ├── mock_text_seq_label_test.py │ │ ├── snli.py │ │ ├── snli_test.py │ │ ├── trec.py │ │ ├── trec_test.py │ │ ├── utils.py │ │ ├── yahoo_answer.py │ │ └── yahoo_answer_test.py │ ├── feat │ │ ├── .gitkeep │ │ ├── __init__.py │ │ ├── python_speech_features │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── english.wav │ │ │ ├── example.py │ │ │ ├── sigproc.py │ │ │ └── test │ │ │ │ ├── __init__.py │ │ │ │ └── test_sigproc.py │ │ ├── speech_feature.py │ │ ├── speech_feature_test.py │ │ ├── speech_ops.py │ │ ├── speech_ops_test.py │ │ ├── tf_speech_feature.py │ │ └── tf_speech_feature_test.py │ ├── frontend │ │ ├── __init__.py │ │ ├── add_noise_end_to_end.py │ │ ├── add_noise_end_to_end_test.py │ │ ├── add_rir_noise_aecres.py │ │ ├── add_rir_noise_aecres_test.py │ │ ├── analyfiltbank.py │ │ ├── analyfiltbank_test.py │ │ ├── base_frontend.py │ │ ├── cepstrum.py │ │ ├── cepstrum_test.py │ │ ├── cmvn.py │ │ ├── delta_delta.py │ │ ├── delta_delta_test.py │ │ ├── fbank.py │ │ ├── fbank_pitch.py │ │ ├── fbank_pitch_test.py │ │ ├── fbank_test.py │ │ ├── framepow.py │ │ ├── framepow_test.py │ │ ├── mel_spectrum.py │ │ ├── mel_spectrum_test.py │ │ ├── mfcc.py │ │ ├── mfcc_test.py │ │ ├── pitch.py │ │ ├── pitch_test.py │ │ ├── plp.py │ │ ├── plp_test.py │ │ ├── read_wav.py │ │ ├── read_wav_test.py │ │ ├── spectrum.py │ │ ├── spectrum_test.py │ │ ├── synthfiltbank.py │ │ ├── synthfiltbank_test.py │ │ ├── write_wav.py │ │ ├── write_wav_test.py │ │ ├── zcr.py │ │ └── zcr_test.py │ ├── preprocess │ │ ├── __init__.py │ │ ├── base_preparer.py │ │ ├── text_cls_preparer.py │ │ ├── text_match_preparer.py │ │ ├── text_nlu_joint_preparer.py │ │ ├── text_ops.py │ │ ├── text_ops_test.py │ │ ├── text_seq2seq_preparer.py │ │ ├── text_seq_label_preparer.py │ │ └── utils.py │ ├── task │ │ ├── __init__.py │ │ ├── asr_seq_task.py │ │ ├── asr_seq_task_test.py │ │ ├── base_speech_task.py │ │ ├── base_task.py │ │ ├── base_text_task.py │ │ ├── kws_cls_task.py │ │ ├── kws_cls_task_test.py │ │ ├── speaker_cls_task.py │ │ ├── speaker_cls_task_test.py │ │ ├── speech_cls_task.py │ │ ├── speech_cls_task_test.py │ │ ├── text_cls_task.py │ │ ├── text_cls_task_test.py │ │ ├── text_match_task.py │ │ ├── text_match_task_test.py │ │ ├── text_nlu_joint_task.py │ │ ├── text_nlu_joint_task_test.py │ │ ├── text_seq2seq_task.py │ │ ├── text_seq2seq_task_test.py │ │ ├── text_seq_label_task.py │ │ └── text_seq_label_task_test.py │ └── utils │ │ ├── __init__.py │ │ ├── common_utils.py │ │ ├── common_utils_test.py │ │ ├── espnet_utils.py │ │ ├── espnet_utils_test.py │ │ ├── htk_reader_lib.py │ │ ├── test_utils.py │ │ └── vocabulary.py ├── layers │ ├── __init__.py │ ├── attention.py │ ├── base_layer.py │ ├── common_layers.py │ ├── common_layers_test.py │ ├── dynamic_pooling.py │ ├── match_pyramid.py │ ├── recurrent.py │ ├── resnet.py │ ├── sub_tf.py │ ├── transformer.py │ ├── utils.py │ ├── utils_test.py │ └── utils_tf.py ├── main.py ├── models │ ├── __init__.py │ ├── asr_model.py │ ├── base_model.py │ ├── kws_model.py │ ├── multimodal_cls_model.py │ ├── resnet_model.py │ ├── speaker_cls_rawmodel.py │ ├── speech_cls_model.py │ ├── speech_cls_rawmodel.py │ ├── text_cls_model.py │ ├── text_hierarchical_model.py │ ├── text_match_model.py │ ├── text_nlu_joint_model.py │ ├── text_seq2seq_model.py │ ├── text_seq_label_model.py │ └── text_seq_model.py ├── resources │ └── cppjieba_dict ├── serving │ ├── __init__.py │ ├── base_frozen_model.py │ ├── eval_asr_pb.py │ ├── eval_speech_cls_pb.py │ ├── eval_text_cls_pb.py │ └── knowledge_distilling.py └── utils │ ├── .gitkeep │ ├── __init__.py │ ├── cmvn.py │ ├── cmvn_test.py │ ├── config.py │ ├── config_test.py │ ├── ctc_utils.py │ ├── ctc_utils_test.py │ ├── decode │ ├── .gitkeep │ ├── py_ctc.py │ ├── py_ctc_test.py │ ├── tf_ctc.py │ └── tf_ctc_test.py │ ├── hparam.py │ ├── hparam_test.py │ ├── kaldi │ ├── __init__.py │ ├── kaldi_dir.py │ ├── kaldi_dir_test.py │ └── kaldi_dir_utils.py │ ├── logger.py │ ├── loss │ ├── __init__.py │ ├── base_loss.py │ ├── loss_impl.py │ ├── loss_utils.py │ └── loss_utils_test.py │ ├── metrics │ ├── __init__.py │ ├── metric_utils.py │ ├── metric_utils_test.py │ ├── py_metrics.py │ ├── py_metrics_test.py │ ├── tf_metrics.py │ └── tf_metrics_test.py │ ├── misc.py │ ├── misc_test.py │ ├── model.py │ ├── optimizer │ ├── __init__.py │ └── yellowfin.py │ ├── plot.py │ ├── postprocess │ ├── __init__.py │ ├── base_postproc.py │ ├── postprocess_utils.py │ ├── postprocess_utils_test.py │ ├── speaker_cls_proc.py │ ├── speech_cls_proc.py │ ├── text_cls_proc.py │ ├── text_seq2seq_proc.py │ └── text_seq_label_proc.py │ ├── register.py │ ├── solver │ ├── __init__.py │ ├── asr_solver.py │ ├── base_solver.py │ ├── emotion_solver.py │ ├── estimator_solver.py │ ├── keras_base_solver.py │ ├── keras_solver.py │ ├── kws_solver.py │ ├── raw_cls_solver.py │ ├── raw_cls_solver_test.py │ ├── raw_match_solver.py │ ├── raw_match_solver_test.py │ ├── raw_nlu_joint_solver.py │ ├── raw_nlu_joint_solver_test.py │ ├── raw_pretrain_cls_solver.py │ ├── raw_pretrain_seq_label_solver.py │ ├── raw_seq2seq_solver.py │ ├── raw_seq2seq_solver_test.py │ ├── raw_seq_label_solver.py │ ├── raw_seq_label_solver_test.py │ ├── raw_solver.py │ ├── speaker_solver.py │ └── utils │ │ ├── __init__.py │ │ ├── callbacks.py │ │ ├── hooks.py │ │ ├── solver_utils.py │ │ └── solver_utils_test.py │ ├── summary.py │ └── textgrid │ ├── gen_segments.sh │ ├── generate_segment_from_textgrid.py │ ├── split_pcm_by_text_grid.py │ └── textgrid.py ├── deltann ├── Makefile ├── README.md ├── api │ ├── c_api.cc │ └── c_api.h ├── build.sh ├── core │ ├── base_model.cc │ ├── base_model.h │ ├── buffer.h │ ├── config.cc │ ├── config.h │ ├── graph.cc │ ├── graph.h │ ├── io.h │ ├── logging.h │ ├── misc.cc │ ├── misc.h │ ├── runtime.cc │ ├── runtime.h │ ├── shape.cc │ ├── shape.h │ ├── shape_test.cc │ ├── tflite_model.cc │ ├── tflite_model.h │ ├── tfmodel.cc │ ├── tfmodel.h │ ├── tfserving_model.cc │ ├── tfserving_model.h │ └── utils │ │ ├── .gitkeep │ │ ├── curl_client.cc │ │ ├── curl_client.h │ │ ├── dynload │ │ ├── dynamic_loader.cc │ │ ├── dynamic_loader.h │ │ ├── feature_extraction_wrapper.cc │ │ └── feature_extraction_wrapper.h │ │ ├── https.cc │ │ ├── https.h │ │ ├── https_client.cc │ │ └── https_client.h ├── deltann_version_script.lds ├── examples │ ├── .gitkeep │ ├── Makefile │ ├── speaker │ │ ├── Makefile │ │ ├── model.yaml │ │ └── test.cc │ ├── text_cls │ │ ├── Makefile │ │ ├── model.yaml │ │ └── test.cc │ └── text_conf_json │ │ ├── Makefile │ │ ├── model.yaml │ │ └── test_conf_json.cc ├── run.sh ├── targets │ ├── android_makefile.inc │ ├── ios_makefile.inc │ └── linux_makefile.inc ├── test.sh └── test │ └── test.cc ├── docker ├── build.sh ├── dockerfile.ci.cpu ├── dockerfile.delta.cpu ├── dockerfile.delta.gpu ├── dockerfile.deltann.cpu ├── dockerfile.deltann.gpu ├── gen_dockerfile.sh ├── install.sh ├── install_user.sh ├── requirements.txt └── sources.list.ubuntu18.04 ├── docs ├── Makefile ├── _static │ ├── .gitkeep │ └── delta_logo.png ├── _templates │ └── .gitkeep ├── conf.py ├── development │ ├── adding_op.md │ ├── contributing.md │ ├── deltann_compile.md │ ├── docker.md │ ├── model_optimization.md │ ├── serving.md │ └── tensorrt.md ├── faq.md ├── index.rst ├── installation │ ├── install_from_source.md │ ├── install_on_macos.md │ ├── manual_setup.md │ ├── pick_installation.md │ ├── using_docker.md │ └── wheel_build.md ├── introduction.md ├── references.md ├── released_models.md ├── tutorials │ ├── deployment │ │ ├── .gitkeep │ │ ├── deltann.md │ │ └── dpl.md │ └── training │ │ ├── data │ │ ├── asr_example.md │ │ ├── emotion-specch-cls.md │ │ └── kws-cls.md │ │ ├── egs.md │ │ ├── imags │ │ ├── log_spectrum_compare.png │ │ ├── mfcc_compare.png │ │ ├── pitch_compare.png │ │ ├── plp_compare.png │ │ └── speech_features.png │ │ ├── speech_features.md │ │ ├── text_class_pip_example.md │ │ └── text_class_source_example.md └── version.md ├── dpl ├── README.md ├── gadapter │ ├── .gitignore │ ├── README.md │ ├── run.sh │ ├── saved_model │ │ └── .gitkeep │ ├── tfgraph │ │ └── .gitkeep │ ├── tflite │ │ └── .gitkeep │ └── tftrt │ │ └── .gitkeep ├── lib │ ├── custom_ops │ │ └── .gitkeep │ ├── deltann │ │ └── .gitkeep │ ├── tensorflow │ │ └── .gitkeep │ └── tflite │ │ └── .gitkeep ├── model │ ├── model.yaml │ └── saved_model │ │ ├── .gitignore │ │ └── .gitkeep └── run.sh ├── egs ├── README.md ├── atis │ ├── README.md │ └── nlu-joint │ │ └── v1 │ │ ├── config │ │ └── nlu_joint.yml │ │ ├── local │ │ └── summary_data.py │ │ └── run.sh ├── atis2 │ └── nlu_joint │ │ └── v1 │ │ ├── config │ │ └── nlu_joint.yml │ │ ├── local │ │ └── generate_standard_format.py │ │ └── run.sh ├── cnn_dailymail │ └── seq2seq │ │ └── v1 │ │ ├── config │ │ ├── lstm_s2s.yml │ │ └── transformer-s2s.yml │ │ ├── local │ │ └── make_datafiles.py │ │ ├── run.sh │ │ └── utils ├── conll2003 │ ├── README.md │ ├── pretrain │ │ └── v1 │ │ │ ├── README.md │ │ │ ├── config │ │ │ ├── seq_label_bert.yml │ │ │ └── seq_label_elmo.yml │ │ │ ├── local │ │ │ ├── generate_bert_vocab.py │ │ │ ├── generate_elmo_vocab.py │ │ │ ├── modeling.py │ │ │ ├── preprocess_bert_dataset.py │ │ │ ├── preprocess_elmo_dataset.py │ │ │ ├── tokenization.py │ │ │ ├── transfer_bert_model.py │ │ │ └── transfer_elmo_model.py │ │ │ ├── run_bert.sh │ │ │ └── run_elmo.sh │ └── seq_label │ │ └── v1 │ │ ├── config │ │ └── seq-label.yml │ │ ├── local │ │ └── change_data_format.py │ │ ├── run.sh │ │ └── utils ├── hkust │ └── asr │ │ └── v1 │ │ ├── .gitignore │ │ ├── README.md │ │ ├── cmd.sh │ │ ├── conf │ │ ├── asr-ctc.yml │ │ ├── cmu2pinyin │ │ ├── fbank.conf │ │ ├── gpu.conf │ │ ├── pinyin2cmu │ │ ├── pitch.conf │ │ ├── queue.conf │ │ └── slurm.conf │ │ ├── local │ │ ├── create_oov_char_lexicon.pl │ │ ├── hkust_data_prep.sh │ │ ├── hkust_format_data.sh │ │ ├── hkust_normalize.pl │ │ ├── hkust_prepare_char_dict.sh │ │ ├── hkust_prepare_dict.sh │ │ ├── hkust_segment.py │ │ └── hkust_train_lms.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── run_delta.sh │ │ ├── steps │ │ └── utils ├── iemocap │ ├── README.md │ └── emo │ │ └── v1 │ │ ├── README.md │ │ ├── conf │ │ ├── emo-keras-blstm.yml │ │ ├── emo-keras-resnet50.yml │ │ ├── emotion-speech-cls.yml │ │ └── multimodal-align-emotion.yml │ │ ├── local │ │ └── python │ │ │ ├── compute_cmvn.py │ │ │ ├── dump_all_data.py │ │ │ ├── dump_data_from_pickle.py │ │ │ ├── generate_vocab.py │ │ │ ├── helper.py │ │ │ ├── inspect_feature.py │ │ │ └── mocap_data_collect.py │ │ ├── path.sh │ │ ├── run.sh │ │ ├── run_mm.sh │ │ └── utils ├── mini_an4 │ ├── README.md │ └── asr │ │ └── v1 │ │ ├── .gitignore │ │ ├── cmd.sh │ │ ├── conf │ │ ├── asr-ctc.yml │ │ ├── decode.yaml │ │ ├── fbank.conf │ │ ├── gpu.conf │ │ ├── lm.yaml │ │ ├── pitch.conf │ │ ├── preprocess.yaml │ │ ├── queue.conf │ │ ├── slurm.conf │ │ └── train.yaml │ │ ├── downloads.tar.gz │ │ ├── dutils │ │ ├── local │ │ └── data_prep.py │ │ ├── path.sh │ │ ├── run.sh │ │ ├── run_delta.sh │ │ ├── speech │ │ ├── steps │ │ └── utils ├── mock_text_cls_data │ └── text_cls │ │ └── v1 │ │ ├── config │ │ ├── cnn-cls.yml │ │ └── han-cls.yml │ │ ├── local │ │ └── generate_mock_data.py │ │ ├── run.sh │ │ └── utils ├── mock_text_match_data │ └── text_match │ │ └── v1 │ │ ├── config │ │ ├── pyramid-match-mock.yml │ │ └── rnn-match-mock.yml │ │ ├── local │ │ └── generate_mock_data.py │ │ ├── run.sh │ │ └── utils ├── mock_text_nlu_joint_data │ └── nlu-joint │ │ └── v1 │ │ ├── config │ │ └── nlu_joint.yml │ │ ├── local │ │ └── generate_mock_data.py │ │ ├── run.sh │ │ └── utils ├── mock_text_seq2seq_data │ └── seq2seq │ │ └── v1 │ │ ├── config │ │ └── transformer-s2s.yml │ │ ├── local │ │ └── generate_mock_data.py │ │ ├── run.sh │ │ └── utils ├── mock_text_seq_label_data │ └── seq-label │ │ └── v1 │ │ ├── config │ │ └── seq-label-mock.yml │ │ ├── local │ │ └── generate_mock_data.py │ │ ├── run.sh │ │ └── utils ├── msra_ner │ ├── README.md │ └── seq_label │ │ └── v1 │ │ ├── config │ │ └── seq-label.yml │ │ ├── local │ │ └── change_data_format.py │ │ ├── run.sh │ │ └── utils ├── quora_qp │ ├── README.md │ └── match │ │ └── v1 │ │ ├── config │ │ └── rnn-match.yml │ │ ├── local │ │ ├── generate_standard_format.py │ │ └── load_data.py │ │ ├── run.sh │ │ └── utils ├── snli │ ├── README.md │ └── match │ │ └── v1 │ │ ├── config │ │ └── rnn-match.yml │ │ ├── local │ │ └── generate_standard_format.py │ │ ├── run.sh │ │ └── utils ├── sre16 │ └── v1 │ │ ├── README.md │ │ ├── cmd.sh │ │ ├── conf │ │ ├── mfcc.conf │ │ └── vad.conf │ │ ├── local │ │ ├── make_musan.py │ │ ├── make_musan.sh │ │ ├── make_mx6.sh │ │ ├── make_mx6_BUT.sh │ │ ├── make_mx6_calls.pl │ │ ├── make_mx6_calls_BUT.pl │ │ ├── make_mx6_mic.pl │ │ ├── make_mx6_mic_BUT.pl │ │ ├── make_sre.pl │ │ ├── make_sre.sh │ │ ├── make_sre08.pl │ │ ├── make_sre08_BUT.pl │ │ ├── make_sre10.pl │ │ ├── make_sre16_eval.pl │ │ ├── make_sre16_eval_BUT.pl │ │ ├── make_sre16_unlabeled.pl │ │ ├── make_sre18_dev.py │ │ ├── make_sre18_eval.py │ │ ├── make_sre_BUT.sh │ │ ├── make_swbd2_phase1.pl │ │ ├── make_swbd2_phase1_BUT.pl │ │ ├── make_swbd2_phase2.pl │ │ ├── make_swbd2_phase2_BUT.pl │ │ ├── make_swbd2_phase3.pl │ │ ├── make_swbd2_phase3_BUT.pl │ │ ├── make_swbd_cellular1.pl │ │ ├── make_swbd_cellular1_BUT.pl │ │ ├── make_swbd_cellular2.pl │ │ └── make_swbd_cellular2_BUT.pl │ │ ├── path.sh │ │ ├── run.sh │ │ ├── sid │ │ ├── steps │ │ └── utils ├── trec │ ├── README.md │ └── text_cls │ │ └── v1 │ │ ├── config │ │ └── cnn-cls.yml │ │ ├── local │ │ └── change_data_format.py │ │ └── run.sh ├── voxceleb │ ├── README.md │ └── spk │ │ └── v1 │ │ ├── .gitignore │ │ ├── README.md │ │ ├── cmd.sh │ │ ├── conf │ │ ├── mfcc.conf │ │ ├── tdnn_arcface.yml │ │ ├── tdnn_softmax.yml │ │ └── vad.conf │ │ ├── local │ │ ├── path.sh │ │ ├── run.sh │ │ ├── run_delta.sh │ │ ├── sid │ │ ├── steps │ │ └── utils ├── wmt14_en_de │ └── nlp1 │ │ ├── config │ │ ├── lstm_s2s.yml │ │ └── transformer-s2s.yml │ │ ├── local │ │ └── generate_stand_vocab.py │ │ └── run.sh └── yahoo_answer │ ├── README.md │ └── text_cls │ └── v1 │ ├── config │ └── han-cls.yml │ ├── local │ └── generate_standard_format.py │ ├── run.sh │ └── utils ├── env.sh ├── gcompiler ├── .gitginore ├── CMakeLists.txt ├── README.md ├── cmake │ ├── build.cmake │ ├── cuda.cmake │ ├── external │ │ └── pybind11.cmake │ ├── tensorflow.cmake │ └── utils.cmake ├── delta_infer │ ├── CMakeLists.txt │ ├── core │ │ ├── CMakeLists.txt │ │ ├── algorithm.h │ │ ├── config.h │ │ ├── debug.h │ │ ├── pattern.cc │ │ ├── pattern.h │ │ ├── scatter_search.cc │ │ └── scatter_search.h │ ├── custom_grappler │ │ ├── CMakeLists.txt │ │ ├── auto_fusion.cc │ │ ├── auto_fusion.h │ │ ├── basic_grappler_optimizer.cc │ │ └── local_optimizer.h │ ├── custom_ops │ │ ├── CMakeLists.txt │ │ ├── alloc.h │ │ ├── ops_utils.h │ │ ├── platform │ │ │ ├── CUDA │ │ │ │ ├── cuda_checks.h │ │ │ │ ├── kernels.h │ │ │ │ ├── kernels │ │ │ │ │ ├── attention.cu │ │ │ │ │ └── feed_forward.cu │ │ │ │ ├── transformer_functor_cu.cc │ │ │ │ └── transformer_functor_nlp_cu.cc │ │ │ └── X86 │ │ │ │ ├── transformer_functor.cc │ │ │ │ └── transformer_nlp_functor.cc │ │ ├── transformer_cell.cc │ │ ├── transformer_cell_bert.cc │ │ ├── transformer_cell_functor.h │ │ ├── transformer_cell_nlp.cc │ │ └── transformer_cell_nlp_functor.h │ ├── scheduler │ │ └── CMakeLists.txt │ └── test │ │ ├── CMakeLists.txt │ │ ├── auto_fusion_test.cc │ │ ├── basic_test.cc │ │ └── subgraphs │ │ └── TansformerCell.pb ├── docs │ ├── customops.md │ └── subgraphs.md ├── example │ ├── c++ │ │ ├── CMakeLists.txt │ │ ├── basic │ │ │ ├── perf.cc │ │ │ └── perf.h │ │ ├── benchmark.cc │ │ ├── gemm_perf.cc │ │ └── perf_test.cc │ └── python │ │ ├── complex_transformer.py │ │ ├── nlp_transformer │ │ ├── __init__.py │ │ └── model.py │ │ ├── simple_bert_transformer.py │ │ ├── simple_transformer.py │ │ ├── standard_transformer │ │ ├── __init__.py │ │ ├── model.py │ │ └── train.py │ │ ├── tts_transformer.py │ │ └── tts_transformer │ │ ├── __init__.py │ │ └── model.py ├── install │ ├── .bazelrc │ ├── bench.sh │ ├── build_gpu.sh │ ├── install.md │ └── viewgraph.sh ├── python │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── delta_infer │ │ ├── __init__.py │ │ ├── cpp │ │ │ ├── __init__.py │ │ │ ├── delta_cpp_export_py.cc │ │ │ └── export_utils.h │ │ ├── optimizer.py │ │ ├── subgraphs │ │ │ ├── __init__.py │ │ │ ├── common │ │ │ │ ├── __init__.py │ │ │ │ ├── generator.py │ │ │ │ └── summarize_graph.py │ │ │ └── transformer │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ └── transformer.py │ │ └── visual_pattern.py │ └── setup.py └── third_party │ └── .gitkeep ├── setup.py ├── tools ├── .gitignore ├── Makefile ├── format.sh ├── install │ ├── build_pip_pkg.sh │ ├── build_tf_pip_pkg.sh │ ├── check_install.py │ ├── install-delta.sh │ ├── install-deltann.sh │ ├── install-go.sh │ ├── install-ops.sh │ ├── prepare_kaldi.sh │ └── sph2pipe.patch ├── license │ ├── LICENSE_cpp │ ├── LICENSE_py │ └── add_license.sh ├── plugins │ ├── README.rst │ ├── pre-commit.sh │ └── vim │ │ ├── autoload │ │ └── yapf.vim │ │ └── plugin │ │ └── yapf.vim ├── release_notes.py ├── requirements.txt ├── test │ ├── .gitignore │ ├── Makefile │ ├── cpp_test.sh │ ├── gen_mock_egs_data.sh │ ├── integration_test.sh │ ├── lint.sh │ ├── python_test.sh │ ├── test_main.cc │ └── tf_env_collect.sh └── valgrind.sh └── utils ├── avg_checkpoints.py ├── deploy ├── benchmark_model.sh ├── convert_frozen_pb_to_tftrt.py ├── convert_frozen_pb_to_tftrt.sh ├── dot.sh ├── print_selective_registration_header.sh ├── saved_model_cli.sh ├── summarize_graph.sh ├── tfconvert_frozen.sh ├── tflite_convert.py ├── tflite_convert_float_noquant.sh ├── tflite_convert_float_quant.sh ├── tflite_convert_int8_noquant.sh ├── tflite_convert_int8_quant.sh ├── tflite_run.py └── transformer_graph.sh ├── dump.sh ├── espnet_utils ├── frozen_saved_model.sh ├── inspect_saved_model.sh ├── nlp ├── scale.py └── split_train_dev.py ├── parse_options.sh ├── pb_pbtxt.py ├── replace_custom_op_attr_pbtxt.py ├── replace_custom_op_attr_pbtxt_test.py ├── run_saved_model.py ├── speech ├── align_ctc_single_utt.sh ├── apply_cmvn.py ├── compute_cmvn_stats.py ├── compute_fbank_feats.py ├── compute_fbank_pitch.py ├── compute_mfcc_feats.py ├── compute_pitch_feats.py ├── compute_plp_feats.py ├── compute_spectrum_feats.py ├── compute_stft_feats.py ├── copy_feats.py ├── ctc_compile_dict_token.sh ├── ctc_token_fst.py ├── decode_ctc.sh ├── decode_ctc_lat.sh ├── make_fbank.sh ├── make_fbank_pitch.sh ├── make_mfcc.sh ├── make_plp.sh ├── make_spectrum.sh ├── make_stft.sh ├── prep_ctc_trans.py └── train_ctc_parallel.sh └── subset_data_dir_tr_cv.py /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | -------------------------------------------------------------------------------- /.gitconfig: -------------------------------------------------------------------------------- 1 | [push] 2 | default = matching 3 | [alias] 4 | ci = commit 5 | br = branch 6 | co = checkout 7 | st = status 8 | [color] 9 | st = auto 10 | diff = auto 11 | ui = true 12 | [credential] 13 | helper = cache --timeout 30000 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | **Thank you for submitting a pull request! Please provide the following information for code review:** 2 | 3 | # Pull Request Summary 4 | [TODO] Please provide a summary for your pull request or mention(#) a related issue number. 5 | 6 | # Test Plan 7 | [TODO] Please provide how to test your code or show validity of the code: i.e., command lines, screen shots, test results, ... 8 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 45 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 30 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - Roadmap 8 | - Bug 9 | - New Feature 10 | # Label to use when marking an issue as stale 11 | staleLabel: Stale 12 | # Comment to post when marking an issue as stale. Set to `false` to disable 13 | markComment: > 14 | This issue has been automatically marked as stale because it has not had 15 | recent activity. It will be closed if no further activity occurs. Thank you 16 | for your contributions. 17 | unmarkComment: false 18 | # Comment to post when closing a stale issue. Set to `false` to disable 19 | closeComment: > 20 | This issue is closed. Please re-open if needed. 21 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | image: 2 | name: "zh794390558/delta:1.14.0-ci-cpu-py3" 3 | 4 | before_script: 5 | - echo "CI_PROJECT_DIR is ${CI_PROJECT_DIR}" 6 | - whoami 7 | - cat /etc/lsb-release 8 | - env 9 | 10 | stages: 11 | - build 12 | - test 13 | 14 | lint_and_unit_test: 15 | stage: test 16 | tags: 17 | - "delta-ci-runner" 18 | only: 19 | - "master" 20 | script: 21 | - env 22 | - cd ${CI_PROJECT_DIR} && source env.sh 23 | - pushd ${CI_PROJECT_DIR}/tools && make basic check_install test && popd 24 | - bash tools/test/python_test.sh 25 | - bash tools/test/cpp_test.sh 26 | - bash tools/test/lint.sh 27 | -------------------------------------------------------------------------------- /.mergify.yml: -------------------------------------------------------------------------------- 1 | pull_request_rules: 2 | - name: automatic merge for master when CI passes and 1 reviews 3 | conditions: 4 | - "#approved-reviews-by>=1" 5 | - check-success=Travis CI - Pull Request 6 | - base=master 7 | actions: 8 | merge: 9 | method: merge -------------------------------------------------------------------------------- /.pip/pip.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | timeout = 60000 3 | index-url = https://mirrors.ustc.edu.cn/pypi/web/simple 4 | extra-index-url = https://mirrors.aliyun.com/pypi/simple/ 5 | format = columns 6 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = yapf 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | services: 4 | - docker 5 | 6 | before_install: 7 | - export DELTA_PATH=`pwd`; echo $DELTA_PATH 8 | - export DOCKER_DELTA="/home/gitlab-runner/delta"; echo $DOCKER_DELTA 9 | - export CI_IMAGE=zh794390558/delta:2.3.0-ci-cpu-py3 10 | - docker pull ${CI_IMAGE} 11 | - docker run -it -d --name travis_con --user root -v ${DELTA_PATH}:${DOCKER_DELTA} ${CI_IMAGE} bash 12 | - docker exec travis_con bash -c "gcc -v && g++ -v" 13 | - docker exec travis_con bash -c "cd ${DOCKER_DELTA}; source env.sh" 14 | #- docker exec travis_con bash -c "cd ${DOCKER_DELTA}/tools; make basic check_install test" 15 | - docker exec travis_con bash -c "cd ${DOCKER_DELTA}/tools; make basic check_install" 16 | - docker exec travis_con bash -c "cd ${DOCKER_DELTA}/tools/install; bash prepare_kaldi.sh" 17 | 18 | jobs: 19 | include: 20 | - stage: test 21 | script: docker exec travis_con bash -c "cd ${DOCKER_DELTA}; bash tools/test/python_test.sh" 22 | - script: docker exec travis_con bash -c "cd ${DOCKER_DELTA}; bash tools/test/lint.sh" 23 | #- script: docker exec travis_con bash -c "cd ${DOCKER_DELTA}; bash tools/test/cpp_test.sh" 24 | -------------------------------------------------------------------------------- /.vimrc: -------------------------------------------------------------------------------- 1 | " Force indentation styles for this directory 2 | autocmd FileType python set shiftwidth=2 3 | autocmd FileType python set tabstop=2 4 | autocmd FileType python set softtabstop=2 5 | set number 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guideline 2 | 3 | Thanks for considering to contribute this project. All issues and pull requests are highly appreciated. 4 | 5 | ## Pull Requests 6 | 7 | Before sending pull request to this project, please read and follow guidelines below. 8 | 9 | 1. Branch: We accept pull request on `master` branch. 10 | 2. Coding style: Follow the coding style used in VirtualAPK. 11 | 3. Commit message: Use English and be aware of your spell. 12 | 4. Test: Make sure to test your code. 13 | 14 | Add device mode, API version, related log, screenshots and other related information in your pull request if possible. 15 | 16 | NOTE: We assume all your contribution can be licensed under the [Apache License 2.0](https://github.com/didi/delta/blob/master/LICENSE). 17 | 18 | ## Issues 19 | 20 | We love clearly described issues. :) 21 | 22 | Following information can help us to resolve the issue faster. 23 | 24 | * Device mode and hardware information. 25 | * API version. 26 | * Logs. 27 | * Screenshots. 28 | * Steps to reproduce the issue. 29 | 30 | ## Coding Styles 31 | Please follow the coding styles [here](https://github.com/didi/delta/blob/master/docs/development/contributing.md) 32 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Hui Zhang 2 | Chengyun Deng 3 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Delta release management 2 | 3 | This describes the process by which versions of Delta are officially released to the public. 4 | 5 | 6 | ## Release process 7 | 8 | Releases and release notes are published to [github](https://github.com/Delta-ML/delta/releases/). 9 | Documentation for is published to [readthedocs](https://delta-didi.readthedocs.io/en/latest/). 10 | Release builds are published to [pypi](https://pypi.org/project/delta-nlp/). 11 | 12 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | 18 | -------------------------------------------------------------------------------- /core/ops/.gitignore: -------------------------------------------------------------------------------- 1 | gen/ 2 | cppjieba 3 | *.so 4 | !data/sm1_cln.wav 5 | *.scp 6 | !noiselist.scp 7 | !rirlist.scp 8 | !noise 9 | !rir 10 | -------------------------------------------------------------------------------- /core/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | import os 17 | 18 | PACKAGE_OPS_DIR = os.path.dirname(os.path.abspath(__file__)) 19 | -------------------------------------------------------------------------------- /core/ops/data/noise/babble_16k.pcm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/core/ops/data/noise/babble_16k.pcm -------------------------------------------------------------------------------- /core/ops/data/noiselist.scp: -------------------------------------------------------------------------------- 1 | noise/babble_16k.pcm 2 | -------------------------------------------------------------------------------- /core/ops/data/rir/0.rir: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/core/ops/data/rir/0.rir -------------------------------------------------------------------------------- /core/ops/data/rir/1.rir: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/core/ops/data/rir/1.rir -------------------------------------------------------------------------------- /core/ops/data/rir/2.rir: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/core/ops/data/rir/2.rir -------------------------------------------------------------------------------- /core/ops/data/rir/3.rir: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/core/ops/data/rir/3.rir -------------------------------------------------------------------------------- /core/ops/data/rir/4.rir: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/core/ops/data/rir/4.rir -------------------------------------------------------------------------------- /core/ops/data/rirlist.scp: -------------------------------------------------------------------------------- 1 | rir/0.rir 2 | rir/1.rir 3 | rir/2.rir 4 | rir/3.rir 5 | rir/4.rir 6 | -------------------------------------------------------------------------------- /core/ops/data/sm1_cln.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/core/ops/data/sm1_cln.wav -------------------------------------------------------------------------------- /core/ops/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/BaseLib.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #ifndef __BASELIB_H_ 18 | #define __BASELIB_H_ 19 | 20 | #include "typedefs_sh.h" 21 | 22 | void FFT(COMPLEX *pFFTData, int nFFTOrder); 23 | 24 | #endif //__BASELIB_H_ 25 | 26 | /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ 27 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/CAdd_All.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #include "CAdd_All.h" 18 | #include "conv.h" 19 | 20 | CAdd_All::CAdd_All() { st_rir = conv_init(16000, 0); } 21 | 22 | CAdd_All::~CAdd_All() { conv_exit(st_rir); } 23 | 24 | int CAdd_All::add_rir(void* st, short* inputdata, int inputdata_length, 25 | short* outputdata, int* outputdata_size, char* filelist) { 26 | int ret; 27 | ret = conv_process(st, inputdata, inputdata_length, outputdata, 28 | outputdata_size, filelist); 29 | return ret; 30 | } 31 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/CAdd_All.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #ifndef __CADD_ALL_H_ 18 | #define __CADD_ALL_H_ 19 | 20 | class CAdd_All { 21 | private: 22 | public: 23 | CAdd_All(); 24 | ~CAdd_All(); 25 | 26 | int add_rir(void* st, short* inputdata, int inputdata_length, 27 | short* outputdata, int* outputdata_size, char* filelist); 28 | 29 | public: 30 | void* st_rir; 31 | void* st_noise; 32 | }; 33 | 34 | #endif //__CADD_ALL_H_ 35 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/CConv.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #ifndef __CCONV_H_ 18 | #define __CCONV_H_ 19 | #include 20 | #include 21 | #include "BaseLib.h" 22 | #include "typedefs_sh.h" 23 | #define RIR_LENGTH 16000 24 | 25 | class CConv { 26 | private: 27 | public: 28 | CConv(int normflag); 29 | CConv(); 30 | ~CConv(); 31 | 32 | void* apm_handle; 33 | short* inputdata; 34 | short* bufferdata; 35 | int buffer_len; 36 | int frm_len; 37 | int data_len; 38 | float peakthld; 39 | unsigned int enableflag; 40 | 41 | double* H; 42 | int ConvProcess(short* pOrigInputData, long lDataLength, double* ppRIR, 43 | long lRIRLength, short* pOutputData); 44 | int SelectH(char* rir_list); 45 | 46 | int normflag; 47 | }; 48 | 49 | #endif //__CCONV_H_ 50 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/CEcho.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #include "CEcho.h" 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | CEcho::CEcho(int f, float echo_snr_min, float echo_snr_max, float echo_ratio) { 24 | ; 25 | } 26 | 27 | CEcho::~CEcho() { ; } 28 | 29 | int CEcho::process(short* inputdata, int inputdata_length, short* outputdata, 30 | int* outputdata_size, char* filelist) { 31 | if (inputdata == NULL || outputdata == NULL || outputdata_size == NULL) { 32 | return -1; 33 | } 34 | if (inputdata_length < 0) { 35 | return -2; 36 | } 37 | 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/CEcho.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #ifndef __CECHO_H_ 18 | #define __CECHO_H_ 19 | #include 20 | #include 21 | 22 | class CEcho { 23 | private: 24 | public: 25 | CEcho(int f, float echo_snr_min, float echo_snr_max, float echo_ratio); 26 | ~CEcho(); 27 | 28 | int process(short* inputdata, int inputdata_length, short* outputdata, 29 | int* outputdata_size, char* filelist); 30 | 31 | public: 32 | int nFs; 33 | int ahead; 34 | int tail; 35 | float snr_min; 36 | float snr_max; 37 | float snr_ratio; 38 | }; 39 | 40 | #endif //__CECHO_H_ 41 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/addecho.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #ifndef __ADD_ECHO_H_ 18 | #define __ADD_ECHO_H_ 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | void* add_echo_init(int nFs, float echo_snr_min, float echo_snr_max, 25 | float echo_ratio); 26 | 27 | int add_echo_process(void* st, short* inputdata, int inputdata_length, 28 | short* outputdata, int* outputdata_size, char* filelist); 29 | 30 | void add_echo_exit(void* st); 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | #endif //__ADD_ECHO_H_ 36 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/audio.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #ifndef AUDIO_H_ 18 | #define AUDIO_H_ 19 | 20 | #include "add_rir_noise_aecres.h" 21 | #include "tensorflow/core/platform/logging.h" 22 | using namespace tensorflow; 23 | 24 | class audio { 25 | private: 26 | void* st; 27 | 28 | public: 29 | audio(int nFs); 30 | ~audio(); 31 | 32 | int audio_pre_proc(short* inputdata, int inputdata_length, short* outputdata, 33 | int* outputdata_size, bool if_add_rir, char* rir_filelist, 34 | bool if_add_noise, char* noise_filelist, float snr_min, 35 | float snr_max, bool if_add_aecres, char* aecres_filelist); 36 | }; 37 | 38 | #endif // AUDIO_H_ 39 | -------------------------------------------------------------------------------- /core/ops/kernels/add_rir_noise_aecres/conv.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #ifndef __CONV_H_ 18 | #define __CONV_H_ 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | void* conv_init(int nFs, int normflag); 25 | 26 | int conv_process(void* st, short* inputdata, int inputdata_length, 27 | short* outputdata, int* outputdata_size, char* rir_list); 28 | 29 | void conv_exit(void* st); 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | #endif //__CONV_H_ 35 | -------------------------------------------------------------------------------- /core/ops/kernels/string_utils_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' string utils op unittest''' 17 | import delta.compat as tf 18 | 19 | from delta.layers.ops import py_x_ops 20 | 21 | 22 | class StringUtilsOpTest(tf.test.TestCase): 23 | ''' string utils test''' 24 | 25 | def setUp(self): 26 | super().setUp() 27 | 28 | def tearDown(self): 29 | ''' tear down ''' 30 | 31 | def test_lower(self): 32 | ''' test lower string''' 33 | with self.cached_session(use_gpu=False, force_gpu=False): 34 | output = py_x_ops.str_lower("Hello WORLD").eval() 35 | self.assertEqual(b'hello world', output) 36 | output = py_x_ops.str_lower(["Hello WORLD", "ABC XYZ"]).eval() 37 | self.assertAllEqual([b'hello world', b'abc xyz'], output) 38 | 39 | 40 | if __name__ == '__main__': 41 | tf.test.main() 42 | -------------------------------------------------------------------------------- /delta-serving/core/delta_serving_test.go: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | package core 17 | 18 | import ( 19 | "github.com/stretchr/testify/assert" 20 | "net/http" 21 | "net/http/httptest" 22 | "testing" 23 | ) 24 | 25 | func TestDeltaServing(t *testing.T) { 26 | var deltaOptions = DeltaOptions{ 27 | true, 28 | "8004", 29 | "predict", 30 | "../dpl/output/conf/model.yaml", 31 | } 32 | r, err := DeltaListen(deltaOptions) 33 | assert.NoError(t, err) 34 | 35 | w := httptest.NewRecorder() 36 | req, _ := http.NewRequest("POST", "/v1/models/saved_model", nil) 37 | r.ServeHTTP(w, req) 38 | assert.Equal(t, 200, w.Code) 39 | 40 | } 41 | -------------------------------------------------------------------------------- /delta-serving/core/types/delta_types.go: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | package types 17 | 18 | const ( 19 | Develop string = `develop` 20 | Test string = `test` 21 | Prod string = `prod` 22 | Online string = `online` 23 | ) 24 | 25 | const( 26 | DGRPC string = `d_grpc` 27 | DJson string = `d_json` 28 | DString string = `d_string` 29 | ) 30 | -------------------------------------------------------------------------------- /delta-serving/core/types/delta_types_test.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import ( 4 | "gopkg.in/go-playground/assert.v1" 5 | "testing" 6 | ) 7 | 8 | func TestDeltaTypes(t *testing.T) { 9 | dev := Develop 10 | assert.Equal(t, dev, "develop") 11 | } 12 | -------------------------------------------------------------------------------- /delta-serving/core/utils/filepath_util.go: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | package utils 17 | 18 | import ( 19 | "delta/delta-serving/core/types" 20 | "path" 21 | "path/filepath" 22 | "runtime" 23 | ) 24 | 25 | func GetCurrentPath() string { 26 | _, filename, _, _ := runtime.Caller(1) 27 | return path.Dir(filename) 28 | } 29 | 30 | func GetProjectPath(profile string) string { 31 | if profile != types.Develop { 32 | filePath, _ := filepath.Abs(`.`) 33 | return filePath 34 | } 35 | currentPath := GetCurrentPath() 36 | return path.Join(currentPath, `../../`) 37 | } 38 | -------------------------------------------------------------------------------- /delta-serving/core/utils/filepath_util_test.go: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | package utils 17 | 18 | import ( 19 | "github.com/stretchr/testify/assert" 20 | "testing" 21 | ) 22 | 23 | func TestGetProjectPath(t *testing.T) { 24 | pPath := GetProjectPath("./") 25 | assert.NotEqual(t, "", pPath) 26 | } 27 | -------------------------------------------------------------------------------- /delta-serving/examples/Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile References: https://docs.docker.com/engine/reference/builder/ 2 | 3 | # Start from the latest golang base image 4 | FROM golang:latest 5 | 6 | WORKDIR /go/delta 7 | 8 | # Build the Go app 9 | #RUN bash build.sh 10 | 11 | COPY ./output ./output 12 | 13 | # Expose port 8004 to the outside world 14 | EXPOSE 8004 15 | 16 | # Command to run the executable 17 | CMD ["pushd output/delta-service"] 18 | CMD ["./run.sh start &"] 19 | CMD ["popd"] 20 | -------------------------------------------------------------------------------- /delta-serving/examples/build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -euf -o pipefail 3 | 4 | source server-env.sh 5 | 6 | pushd ../ 7 | if [ -d dpl ]; then 8 | rm -rf dpl 9 | fi 10 | popd 11 | 12 | delta_file="delta-service" 13 | 14 | if [ -f "$delta_file" ]; then 15 | rm -rf $delta_file 16 | fi 17 | 18 | if [ -d output ]; then 19 | rm -rf output 20 | fi 21 | 22 | mkdir -p output/$delta_file/log 23 | mkdir -p output/dpl/ 24 | 25 | cp -R ../../../dpl/output/ output/dpl/ 26 | 27 | cp -R ../mock_yaml/nlp/model.yaml output/dpl/output/model/saved_model/1/ 28 | 29 | if [ -d ../output ] 30 | then 31 | rm -rf ../output 32 | fi 33 | 34 | cp -R output/dpl ../ 35 | 36 | # build delta serving 37 | GOOS=linux GOARCH=amd64 go build -o $delta_file main.go 38 | 39 | # copy utils 40 | cp $delta_file output/$delta_file 41 | cp run.sh output/$delta_file 42 | cp server-env.sh output/$delta_file 43 | chmod +x output/$delta_file/run.sh 44 | -------------------------------------------------------------------------------- /delta-serving/examples/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source server-env.sh 4 | 5 | workspace=$(cd $(dirname $0) && pwd -P) 6 | cd $workspace 7 | 8 | app=delta-service 9 | conf="" 10 | 11 | action=$1 12 | case $action in 13 | "start" ) 14 | exec "./$app" -log_dir=./log -alsologtostderr=true -port "8004" -yaml "../dpl/output/model/saved_model/1/model.yaml" -type "predict" -debug true 15 | ;; 16 | * ) 17 | echo "usage: $0 [start]" 18 | ;; 19 | esac 20 | -------------------------------------------------------------------------------- /delta-serving/examples/server-env.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | export DELTANN_MAIN=../dpl/output/lib/deltann 4 | export DELTANN_OPS=../dpl/output/lib/custom_ops 5 | export DELTANN_TENSORFLOW=../dpl/output/lib/tensorflow 6 | 7 | LD_LIBRARY_PATH=$DELTANN_MAIN:$DELTANN_OPS:$DELTANN_TENSORFLOW:$LD_LIBRARY_PATH 8 | export LD_LIBRARY_PATH 9 | 10 | -------------------------------------------------------------------------------- /delta-serving/go.mod: -------------------------------------------------------------------------------- 1 | module delta/delta-serving 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/gin-contrib/pprof v1.2.1 7 | github.com/gin-contrib/sse v0.1.0 // indirect 8 | github.com/gin-gonic/gin v1.4.0 9 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b 10 | github.com/json-iterator/go v1.1.7 // indirect 11 | github.com/kr/pretty v0.1.0 // indirect 12 | github.com/mattn/go-isatty v0.0.8 // indirect 13 | github.com/stretchr/testify v1.4.0 14 | github.com/ugorji/go v1.1.7 // indirect 15 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859 // indirect 16 | golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a // indirect 17 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect 18 | gopkg.in/go-playground/assert.v1 v1.2.1 19 | gopkg.in/yaml.v2 v2.2.2 20 | ) 21 | -------------------------------------------------------------------------------- /delta/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | import os 17 | 18 | PACKAGE_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 19 | -------------------------------------------------------------------------------- /delta/compat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """The compatible tensorflow library.""" 16 | 17 | from tensorflow.compat.v1 import * # pylint:disable=wildcard-import 18 | 19 | # Import absl.flags and absl.logging to overwrite the Tensorflow ones. 20 | # This is the intended behavior in TF 2.0. 21 | # pylint:disable=g-bad-import-order, unused-import, g-import-not-at-top 22 | from absl import flags 23 | from absl import logging 24 | # pylint: disable=g-direct-tensorflow-import 25 | from tensorflow.python.compat import v2_compat 26 | 27 | from tensorflow.python.framework import function 28 | # pylint: enable=g-direct-tensorflow-import 29 | 30 | v2_compat.disable_v2_behavior() 31 | Defun = function.Defun 32 | -------------------------------------------------------------------------------- /delta/compat_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | import delta.compat as tf 18 | from tensorflow.python.framework import function # pylint:disable=g-direct-tensorflow-import 19 | 20 | 21 | class CompatTest(tf.test.TestCase): 22 | 23 | def testSomeTFSymbols(self): 24 | self.assertFalse(tf.executing_eagerly()) 25 | self.assertIsNotNone(tf.logging) 26 | self.assertIsNotNone(tf.flags) 27 | self.assertIs(tf.Defun, function.Defun) 28 | 29 | 30 | if __name__ == '__main__': 31 | tf.test.main() 32 | -------------------------------------------------------------------------------- /delta/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' init of data package''' 17 | -------------------------------------------------------------------------------- /delta/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | -------------------------------------------------------------------------------- /delta/data/datasets/atis2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.atis2 import ATIS2 21 | 22 | 23 | class ATIS2Test(tf.test.TestCase): 24 | """data class test for nlu-joint task.""" 25 | 26 | def test_build(self): 27 | atis2 = ATIS2('/atis2') 28 | atis2.build() 29 | self.assertTrue(atis2.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/atis_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.atis import ATIS 21 | 22 | 23 | class ATISTest(tf.test.TestCase): 24 | """atis data class for nlu joint task.""" 25 | 26 | def test_build(self): 27 | atis = ATIS('atis') 28 | atis.build() 29 | self.assertTrue(atis.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | from absl import logging 18 | from delta.utils.register import registers 19 | 20 | 21 | def build_dataset(dataset_name, dataset_dir): 22 | if dataset_name not in registers.dataset: 23 | logging.warning(f"Dataset: {dataset_name} not supported!") 24 | ds_cls = registers.dataset[dataset_name] 25 | ds_obj = ds_cls(dataset_dir) 26 | res = ds_obj.build() 27 | if not res: 28 | logging.info(f"Dataset: {dataset_name} built failed!") 29 | return 30 | logging.info(f"Dataset: {dataset_name} built successfully.") 31 | -------------------------------------------------------------------------------- /delta/data/datasets/conll_2003_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.conll_2003 import Conll2003 21 | 22 | 23 | class Conll2003Test(tf.test.TestCase): 24 | """conll2003 data class for seqlabel task.""" 25 | 26 | def test_build(self): 27 | conll_2003 = Conll2003('conll_2003') 28 | conll_2003.build() 29 | self.assertTrue(conll_2003.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/mock_text_cls_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.mock_text_cls_data import MockTextCLSData 21 | 22 | 23 | class MockTextClsDataTest(tf.test.TestCase): 24 | """mock cls data class for cls task.""" 25 | 26 | def test_build(self): 27 | mock_text_cls_data = MockTextCLSData('mock_cls_data') 28 | mock_text_cls_data.build() 29 | self.assertTrue(mock_text_cls_data.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/mock_text_match_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.mock_text_match_data import MockTextMatchData 21 | 22 | 23 | class MockTextMatchDataTest(tf.test.TestCase): 24 | """mock data class test for match task.""" 25 | 26 | def test_build(self): 27 | mock_text_match_data = MockTextMatchData('mock_match_data') 28 | mock_text_match_data.build() 29 | self.assertTrue(mock_text_match_data.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/mock_text_nlu_joint_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.mock_text_nlu_joint_data import MockTextNLUJointData 21 | 22 | 23 | class MockTextNLUJointDataTest(tf.test.TestCase): 24 | """mock data class test for nlu-joint task.""" 25 | 26 | def test_build(self): 27 | mock_text_nlu_joint_data = MockTextNLUJointData('mock_nlu_joint_data') 28 | mock_text_nlu_joint_data.build() 29 | self.assertTrue(mock_text_nlu_joint_data.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/mock_text_seq2seq_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.mock_text_seq2seq_data import MockTextSeq2SeqData 21 | 22 | 23 | class MockTextSeq2SeqDataTest(tf.test.TestCase): 24 | """data class test for seq2seq task.""" 25 | 26 | def test_build(self): 27 | mock_text_seq2seq_data = MockTextSeq2SeqData('mock_seq2seq_data') 28 | mock_text_seq2seq_data.build() 29 | self.assertTrue(mock_text_seq2seq_data.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/mock_text_seq_label_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.mock_text_seq_label_data import MockTextSeqLabelData 21 | 22 | 23 | class MockTextSeqLabelDataTest(tf.test.TestCase): 24 | """mock data class test for seqlabel task.""" 25 | 26 | def test_build(self): 27 | mock_text_seq_label_data = MockTextSeqLabelData('mock_seq_label_data') 28 | mock_text_seq_label_data.build() 29 | self.assertTrue(mock_text_seq_label_data.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/snli_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.snli import SNLI 21 | 22 | 23 | class SNLITest(tf.test.TestCase): 24 | """snli data class for match task.""" 25 | 26 | def test_build(self): 27 | snli = SNLI('snli') 28 | snli.build() 29 | self.assertTrue(snli.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/trec_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.trec import TREC 21 | 22 | 23 | class TRECTest(tf.test.TestCase): 24 | """trec data class for cls task.""" 25 | 26 | def test_build(self): 27 | trec = TREC('trec') 28 | trec.build() 29 | self.assertTrue(trec.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/datasets/yahoo_answer_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for compat.py.""" 16 | 17 | from absl import logging 18 | 19 | import delta.compat as tf 20 | from delta.data.datasets.yahoo_answer import YahooAnswer 21 | 22 | 23 | class YahooAnswerTest(tf.test.TestCase): 24 | """yahoo answer data class test for cls task.""" 25 | 26 | def test_build(self): 27 | yahoo_answer = YahooAnswer('yahoo_answer') 28 | yahoo_answer.build() 29 | self.assertTrue(yahoo_answer.is_ready()) 30 | 31 | 32 | if __name__ == '__main__': 33 | logging.set_verbosity(logging.DEBUG) 34 | tf.test.main() 35 | -------------------------------------------------------------------------------- /delta/data/feat/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/delta/data/feat/.gitkeep -------------------------------------------------------------------------------- /delta/data/feat/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' speech feature ''' 17 | from delta.data.feat import speech_ops 18 | 19 | from .speech_feature import load_wav 20 | from .speech_feature import extract_feature 21 | from .speech_feature import add_delta_delta 22 | 23 | # numpy 24 | from .speech_feature import extract_fbank 25 | from .speech_feature import delta_delta 26 | from .speech_feature import fbank_feat 27 | from .speech_feature import powspec_feat 28 | from .speech_feature import extract_feat 29 | -------------------------------------------------------------------------------- /delta/data/feat/python_speech_features/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' Speech feature extractor. ''' 17 | from .base import * 18 | -------------------------------------------------------------------------------- /delta/data/feat/python_speech_features/english.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/delta/data/feat/python_speech_features/english.wav -------------------------------------------------------------------------------- /delta/data/feat/python_speech_features/example.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | #!/usr/bin/env python3 18 | ''' Example for sigproc.py ''' 19 | 20 | # pylint: skip-file 21 | 22 | import scipy.io.wavfile as wav 23 | 24 | from base import mfcc 25 | from base import delta 26 | from base import logfbank 27 | 28 | if __name__ == '__main__': 29 | (rate, sig) = wav.read("english.wav") 30 | mfcc_feat = mfcc(sig, rate) 31 | d_mfcc_feat = delta(mfcc_feat, 2) 32 | fbank_feat = logfbank(sig, rate) 33 | 34 | print(fbank_feat[1:3, :]) 35 | -------------------------------------------------------------------------------- /delta/data/feat/python_speech_features/test/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | -------------------------------------------------------------------------------- /delta/data/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' init of frontend package''' 17 | -------------------------------------------------------------------------------- /delta/data/preprocess/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Init of preprocess""" 17 | -------------------------------------------------------------------------------- /delta/data/task/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | -------------------------------------------------------------------------------- /delta/data/task/base_speech_task.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' Base Speech Task''' 17 | from delta import utils 18 | from delta.data import utils as data_utils 19 | from delta.data.task.base_task import WavSpeechTask 20 | 21 | #pylint: disable=abstract-method 22 | 23 | 24 | class SpeechTask(WavSpeechTask): 25 | ''' base class for speech task''' 26 | 27 | def __init__(self, config, mode): 28 | super().__init__(config) 29 | assert mode in (utils.TRAIN, utils.EVAL, utils.INFER) 30 | self._mode = mode 31 | 32 | @property 33 | def mode(self): 34 | return self._mode 35 | 36 | #pylint: disable=arguments-differ 37 | def input_fn(self, mode, batch_size, num_epoch=None): 38 | ''' estimator input_fn''' 39 | return data_utils.input_fn(self.dataset, mode, batch_size, num_epoch) 40 | -------------------------------------------------------------------------------- /delta/data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Utilities for data related operations.""" 17 | from delta.data.utils.common_utils import * 18 | -------------------------------------------------------------------------------- /delta/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Custom layers.""" 17 | 18 | from delta.layers.attention import HanAttention 19 | from delta.layers.recurrent import RnnAttentionEncoder 20 | from delta.layers.attention import MatchAttention 21 | from delta.layers.recurrent import RnnEncoder 22 | from delta.layers.recurrent import RnnDecoder 23 | from delta.layers.sub_tf import MultiHeadAttention 24 | from delta.layers.sub_tf import PositionEmbedding 25 | from delta.layers.sub_tf import PositionwiseFeedForward 26 | from delta.layers.transformer import TransformerEncoder 27 | from delta.layers.transformer import TransformerDecoder 28 | 29 | from delta.layers.common_layers import * 30 | -------------------------------------------------------------------------------- /delta/layers/base_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Base class for layer.""" 17 | 18 | import delta.compat as tf 19 | 20 | 21 | class Layer(tf.keras.layers.Layer): 22 | """Base class for layer.""" 23 | 24 | def __init__(self, **kwargs): 25 | super().__init__(**kwargs) 26 | 27 | def build(self, input_shape): 28 | """Creates the variables of the layer.""" 29 | #pylint: disable=useless-super-delegation 30 | super().build(input_shape) 31 | 32 | def call(self, inputs, training=None, mask=None): 33 | """This is where the layer's logic lives.""" 34 | # pylint: disable=arguments-differ 35 | raise NotImplementedError() 36 | -------------------------------------------------------------------------------- /delta/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Custom models.""" 17 | -------------------------------------------------------------------------------- /delta/resources/cppjieba_dict: -------------------------------------------------------------------------------- 1 | ../../tools/cppjieba/dict -------------------------------------------------------------------------------- /delta/serving/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' delta.serving ''' 17 | -------------------------------------------------------------------------------- /delta/utils/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/delta/utils/.gitkeep -------------------------------------------------------------------------------- /delta/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' utils module ''' 17 | from delta.utils.cmvn import * 18 | from delta.utils.misc import * 19 | from delta.utils.plot import * 20 | from delta.utils.model import * 21 | from delta.utils.config import * 22 | from delta.utils.logger import * 23 | -------------------------------------------------------------------------------- /delta/utils/decode/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/delta/utils/decode/.gitkeep -------------------------------------------------------------------------------- /delta/utils/kaldi/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """ Utils for Kaldi data IO. """ 17 | -------------------------------------------------------------------------------- /delta/utils/loss/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' loss module ''' 17 | -------------------------------------------------------------------------------- /delta/utils/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' metrics utils ''' 17 | from delta.utils.metrics.py_metrics import * 18 | from delta.utils.metrics.tf_metrics import * 19 | from delta.utils.metrics.metric_utils import * 20 | -------------------------------------------------------------------------------- /delta/utils/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' init of optimizer''' 17 | from .yellowfin import YFOptimizer 18 | -------------------------------------------------------------------------------- /delta/utils/postprocess/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' init of postprocess''' 17 | -------------------------------------------------------------------------------- /delta/utils/postprocess/base_postproc.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' postprocess abstract class ''' 17 | import abc 18 | 19 | 20 | #pylint: disable=too-few-public-methods 21 | class PostProcABC(metaclass=abc.ABCMeta): 22 | ''' postprocess abstract class''' 23 | 24 | def __init__(self, config): 25 | pass 26 | 27 | @abc.abstractmethod 28 | def call(self): 29 | ''' implementation func ''' 30 | raise NotImplementedError() 31 | 32 | 33 | #pylint: disable=abstract-method 34 | class PostProc(PostProcABC): 35 | ''' base class of postprocess class''' 36 | 37 | def __init__(self, config): 38 | super().__init__(config) 39 | self.config = config 40 | 41 | def __call__(self, *args, **kwargs): 42 | return self.call(*args, **kwargs) 43 | -------------------------------------------------------------------------------- /delta/utils/postprocess/postprocess_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | ''' postprocess utils ''' 17 | from absl import logging 18 | from delta.data.preprocess.utils import load_vocab_dict 19 | 20 | 21 | def ids_to_sentences(ids, vocab_file_path): 22 | """ 23 | transform array of numbers to array of tags/words 24 | ids: [[1,2],[3,4]...] 25 | """ 26 | 27 | vocab_dict = load_vocab_dict(vocab_file_path) 28 | id_to_vocab = {int(v): k for k, v in vocab_dict.items()} 29 | 30 | sentences = [] 31 | for sent in ids: 32 | sent_char = [] 33 | for s_char in sent: 34 | if s_char not in id_to_vocab: 35 | logging.error("label not in vocabs") 36 | else: 37 | sent_char.append(id_to_vocab[s_char]) 38 | sentences.append(sent_char) 39 | assert len(sentences) == len(ids) 40 | return sentences 41 | -------------------------------------------------------------------------------- /delta/utils/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Solvers.""" 17 | -------------------------------------------------------------------------------- /delta/utils/solver/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/delta/utils/solver/utils/__init__.py -------------------------------------------------------------------------------- /delta/utils/textgrid/gen_segments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | if [[ $# != 1 ]];then 5 | echo "usage: $0 dataset_path" 6 | exit 1 7 | fi 8 | 9 | data_path=$1 10 | 11 | find $data_path -name '*.TextGrid' > textgrid.list 12 | 13 | 14 | python3 generate_segment_from_textgrid.py textgrid.list textgrid.segments 15 | 16 | cp textgrid.segments $data_path 17 | -------------------------------------------------------------------------------- /deltann/README.md: -------------------------------------------------------------------------------- 1 | # DETLA-NN 2 | 3 | ```bash 4 | total 104 5 | -rw-rw-r-- 1 60847 60847 3541 Jul 7 01:24 Makefile 6 | -rw-r--r-- 1 root root 0 Jul 7 07:55 README.md 7 | drwxrwxr-x 2 60847 60847 47 Jul 7 01:24 api/ 8 | -rwxrwxr-x 1 60847 60847 405 Jul 7 01:24 build.sh* 9 | drwxrwxr-x 3 60847 60847 4096 Jul 7 07:52 core/ 10 | -rw-rw-r-- 1 60847 60847 46 Jul 7 01:24 deltann_version_script.lds 11 | lrwxrwxrwx 1 60847 60847 7 Jul 7 01:24 run.sh -> test.sh* 12 | drwxrwxr-x 2 60847 60847 97 Jul 7 01:24 targets/ 13 | drwxrwxr-x 2 60847 60847 28 Jul 7 04:34 test/ 14 | -rwxrwxr-x 1 60847 60847 726 Jul 7 01:24 test.sh* 15 | 16 | drwxrwxr-x 4 60847 60847 85 Jul 7 01:24 examples/ 17 | 18 | drwxrwxr-x 5 60847 60847 118 Jul 7 01:24 server/ 19 | ``` 20 | 21 | `api`, `core`, `targets`, `test`, `deltann_version_script.lds` are src for `deltann` 22 | 23 | `examples` are some demos using `deltann` 24 | 25 | `server` are RESTful API for `deltann` wrapper by golang 26 | -------------------------------------------------------------------------------- /deltann/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# != 3 ];then 4 | echo "usage: $0 [linux|android|ios] [x86_64|arm|arm64] [tf|tflite|tfserving]" 5 | echo "now only support [linux] [x86_64] [TF|TFLITE|TFTRT|TFSERVING]" 6 | exit 1 7 | fi 8 | 9 | platform=$1 10 | arch=$2 11 | engine=$3 12 | 13 | #TARGET=$platform TARGET_ARCH=$arch ENGINE=$engine make -f Makefile -e 14 | export TARGET=$platform 15 | export TARGET_ARCH=$arch 16 | export ENGINE=$engine 17 | 18 | make clean 19 | make -f Makefile -e 20 | -------------------------------------------------------------------------------- /deltann/core/base_model.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "core/base_model.h" 23 | 24 | namespace delta { 25 | namespace core { 26 | 27 | using std::string; 28 | 29 | BaseModel::BaseModel(ModelMeta model_meta, int num_threads) 30 | : _model_meta(model_meta), _num_threads(num_threads) {} 31 | 32 | BaseModel::~BaseModel() {} 33 | 34 | } // namespace core 35 | } // namespace delta 36 | -------------------------------------------------------------------------------- /deltann/core/shape_test.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #include "core/shape.h" 18 | #include 19 | 20 | using delta::core::Shape; 21 | 22 | TEST(ShapeTest, Construct) { 23 | std::vector v({1, 2, 3}); 24 | Shape s(v); 25 | EXPECT_EQ(s.ndim(), v.size()); 26 | } 27 | -------------------------------------------------------------------------------- /deltann/core/utils/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/deltann/core/utils/.gitkeep -------------------------------------------------------------------------------- /deltann/core/utils/dynload/dynamic_loader.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #ifndef DYNLOAD_DYNAMIC_LOADER_H 18 | #define DYNLOAD_DYNAMIC_LOADER_H 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace inference { 25 | 26 | /** 27 | * @brief load the DSO of tf_feature_extraction 28 | * 29 | * @param dso_handle dso handler 30 | * 31 | **/ 32 | void get_feature_extraction_dsohandle(void** dso_handle); 33 | 34 | } // namespace inference 35 | 36 | #endif // DYNLOAD_DYNAMIC_LOADER_H 37 | -------------------------------------------------------------------------------- /deltann/core/utils/dynload/feature_extraction_wrapper.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #include "feature_extraction_wrapper.h" 18 | 19 | namespace inference { 20 | 21 | namespace dynload { 22 | 23 | std::once_flag feature_dso_flag; 24 | void *feature_dso_handle = nullptr; 25 | 26 | #define DEFINE_WRAP(__name) struct dynload__##__name __name 27 | 28 | FEATURE_EXTRACTION_ROUTINE_EACH(DEFINE_WRAP); 29 | 30 | } // namespace dynload 31 | 32 | } // namespace inference 33 | -------------------------------------------------------------------------------- /deltann/deltann_version_script.lds: -------------------------------------------------------------------------------- 1 | Delatnn_1.0 { 2 | global: *Delta*; 3 | local: *; 4 | }; 5 | 6 | -------------------------------------------------------------------------------- /deltann/examples/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/deltann/examples/.gitkeep -------------------------------------------------------------------------------- /deltann/examples/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | CUR_DIR := $(shell pwd) 18 | EXC_DIR := "" 19 | 20 | SUB_DIR := $(shell find . -maxdepth 1 -type d) 21 | SUB_DIR := $(basename $(patsubst ./%,%,$(SUB_DIR))) 22 | SUB_DIR := $(filter-out $(EXC_DIR),$(SUB_DIR)) 23 | 24 | MAKELIST = @for subdir in $(SUB_DIR); \ 25 | do \ 26 | $(MAKE) -C $(CUR_DIR)/$$subdir; \ 27 | done 28 | 29 | CLEANLIST = @for subdir in $(SUB_DIR); \ 30 | do \ 31 | $(MAKE) -C $(CUR_DIR)/$$subdir clean; \ 32 | done 33 | 34 | all: 35 | $(MAKELIST) 36 | 37 | clean: 38 | $(CLEANLIST) 39 | -------------------------------------------------------------------------------- /deltann/examples/speaker/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | CXXFLAGS := -std=c++11 -fPIC -DINFERENCE_VERSION=\"$(shell git rev-parse --short HEAD)\" -DNDEBUG -O2 18 | 19 | ABSEIL := $(ROOT_DIR)/../tools/abseil-cpp 20 | 21 | INCLUDES := -I$(ROOT_DIR) -I$(JSONCPP) -I$(ABSEIL) 22 | 23 | TEST_SRC := test.cc 24 | TEST_BIN := test.bin 25 | 26 | $(TEST_BIN): $(TEST_SRC) 27 | $(CXX) $(CXXFLAGS) $(INCLUDES) $(TEST_SRC) -o $(TEST_BIN) -L $(LIBDIR) -ldeltann $(LIBS) 28 | 29 | all: $(TEST_BIN) 30 | 31 | clean: 32 | $(RM) $(TEST_BIN) 33 | -------------------------------------------------------------------------------- /deltann/examples/text_cls/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | CXXFLAGS := -std=c++11 -fPIC -DINFERENCE_VERSION=\"$(shell git rev-parse --short HEAD)\" -DNDEBUG -O2 18 | 19 | ABSEIL := $(ROOT_DIR)/../tools/abseil-cpp 20 | 21 | INCLUDES := -I$(ROOT_DIR) -I$(JSONCPP) -I$(ABSEIL) 22 | 23 | TEST_SRC := test.cc 24 | TEST_BIN := test.bin 25 | 26 | $(TEST_BIN): $(TEST_SRC) 27 | $(CXX) $(CXXFLAGS) $(INCLUDES) $(TEST_SRC) -o $(TEST_BIN) -L $(LIBDIR) -ldeltann $(LIBS) 28 | 29 | all: $(TEST_BIN) 30 | 31 | clean: 32 | $(RM) $(TEST_BIN) 33 | -------------------------------------------------------------------------------- /deltann/examples/text_conf_json/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | CXXFLAGS := -std=c++11 -fPIC -DINFERENCE_VERSION=\"$(shell git rev-parse --short HEAD)\" -DNDEBUG -O2 18 | 19 | ABSEIL := $(ROOT_DIR)/../tools/abseil-cpp 20 | JSONCPP :=$(ROOT_DIR)/../tools/jsoncpp/include/ 21 | 22 | INCLUDES := -I$(ROOT_DIR) -I$(JSONCPP) -I$(ABSEIL) 23 | 24 | JSONLIBS := $(ROOT_DIR)/../tools/jsoncpp/build-release_shared_g++ 25 | 26 | TEST_SRC := test_conf_json.cc 27 | TEST_BIN := test.bin 28 | 29 | $(TEST_BIN): $(TEST_SRC) 30 | $(CXX) $(CXXFLAGS) $(INCLUDES) $(TEST_SRC) -o $(TEST_BIN) -L $(LIBDIR) -L $(JSONLIBS) -ldeltann $(LIBS) 31 | 32 | all: $(TEST_BIN) 33 | 34 | clean: 35 | $(RM) $(TEST_BIN) 36 | -------------------------------------------------------------------------------- /deltann/run.sh: -------------------------------------------------------------------------------- 1 | test.sh -------------------------------------------------------------------------------- /deltann/targets/ios_makefile.inc: -------------------------------------------------------------------------------- 1 | # Settings for IOS. 2 | ifeq ($(TARGET), ios) 3 | BUILD_FOR_IOS_SIMULATOR := false 4 | ifeq ($(TARGET_ARCH), x86_64) 5 | BUILD_FOR_IOS_SIMULATOR := true 6 | endif 7 | ifeq ($(TARGET_ARCH), i386) 8 | BUILD_FOR_IOS_SIMULATOR := true 9 | CXXFLAGS += -Dthread_local= 10 | endif 11 | ifeq ($(BUILD_FOR_IOS_SIMULATOR), true) 12 | IPHONEOS_PLATFORM := $(shell xcrun --sdk iphonesimulator \ 13 | --show-sdk-platform-path) 14 | IPHONEOS_SYSROOT := $(shell xcrun --sdk iphonesimulator \ 15 | --show-sdk-path) 16 | else 17 | IPHONEOS_PLATFORM := $(shell xcrun --sdk iphoneos --show-sdk-platform-path) 18 | IPHONEOS_SYSROOT := $(shell xcrun --sdk iphoneos --show-sdk-path) 19 | endif 20 | IOS_SDK_VERSION := $(shell xcrun --sdk iphoneos --show-sdk-version) 21 | MIN_SDK_VERSION := 8.0 22 | CXX := g++ 23 | CC := gcc 24 | AR := ar 25 | 26 | CXXFLAGS += -std=c++11 -miphoneos-version-min=${MIN_SDK_VERSION} \ 27 | -fembed-bitcode \ 28 | -Wno-c++11-narrowing \ 29 | -mno-thumb \ 30 | -isysroot ${IPHONEOS_SYSROOT} \ 31 | -arch $(TARGET_ARCH) \ 32 | -O3 33 | CCFLAGS += -miphoneos-version-min=${MIN_SDK_VERSION} \ 34 | -isysroot ${IPHONEOS_SYSROOT} \ 35 | -fembed-bitcode \ 36 | -arch $(TARGET_ARCH) \ 37 | -O3 38 | LDFLAGS += -fembed-bitcode \ 39 | -miphoneos-version-min=${MIN_SDK_VERSION} \ 40 | -arch ${TARGET_ARCH} \ 41 | -framework Accelerate #-stdlib=libc++ -lc++ -lc++abi 42 | 43 | endif 44 | -------------------------------------------------------------------------------- /deltann/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -lt 1 ];then 4 | echo "usage: $0 [speaker|text_cls|dir_name_under_examples] [memcheck]" 5 | exit 1 6 | fi 7 | 8 | set -e 9 | 10 | make examples 11 | 12 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/../dpl/lib/tensorflow:$PWD/../dpl/lib/deltann/ 13 | #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/../dpl/lib/tensorflow:$PWD/.gen/lib 14 | 15 | VARGRIND= 16 | if [ $2 == "memcheck" ];then 17 | VARGRIND="valgrind --leak-check=full --log-file=valgrind.log --show-leak-kinds=all" 18 | fi 19 | 20 | 21 | case $1 in 22 | text_cls) 23 | echo "text_cls" 24 | $VARGRIND ./examples/text_cls/test.bin examples/text_cls/model.yaml 25 | ;; 26 | speaker) 27 | echo "speaker" 28 | $VARGRIND ./examples/speaker/test.bin examples/speaker/model.yaml 29 | ;; 30 | *) 31 | echo "Error param: $1" 32 | exit 1 33 | ;; 34 | esac 35 | 36 | exit 0 37 | -------------------------------------------------------------------------------- /docker/dockerfile.ci.cpu: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.3.0 2 | COPY sources.list.ubuntu18.04 /etc/apt/sources.list 3 | 4 | # install tools 5 | COPY install.sh /install.sh 6 | RUN /bin/bash /install.sh 7 | 8 | COPY requirements.txt /ci/requirements.txt 9 | WORKDIR /ci 10 | RUN sudo pip --no-cache-dir install -i https://mirrors.aliyun.com/pypi/simple --upgrade pip && pip --no-cache-dir install -i https://mirrors.aliyun.com/pypi/simple --user -r requirements.txt 11 | 12 | CMD ["/bin/bash", "-c"] 13 | -------------------------------------------------------------------------------- /docker/dockerfile.delta.cpu: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.3.0 2 | COPY sources.list.ubuntu18.04 /etc/apt/sources.list 3 | 4 | # install tools 5 | COPY install.sh /install.sh 6 | RUN /bin/bash /install.sh 7 | 8 | RUN sudo mkdir workspace 9 | RUN cd /workspace && git clone --depth 1 https://github.com/didi/delta.git 10 | RUN cd /workspace/delta/tools && make basic 11 | WORKDIR /workspace/delta 12 | 13 | CMD ["/bin/bash", "-c"] 14 | -------------------------------------------------------------------------------- /docker/dockerfile.delta.gpu: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.3.0-gpu 2 | COPY sources.list.ubuntu18.04 /etc/apt/sources.list 3 | 4 | # install tools 5 | COPY install.sh /install.sh 6 | RUN /bin/bash /install.sh 7 | 8 | RUN sudo mkdir workspace 9 | RUN cd /workspace && git clone --depth 1 https://github.com/didi/delta.git 10 | RUN cd /workspace/delta/tools && make basic 11 | WORKDIR /workspace/delta 12 | 13 | CMD ["/bin/bash", "-c"] 14 | -------------------------------------------------------------------------------- /docker/dockerfile.deltann.cpu: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:devel 2 | COPY sources.list.ubuntu18.04 /etc/apt/sources.list 3 | 4 | # install tools 5 | COPY install.sh /install.sh 6 | RUN /bin/bash /install.sh 7 | 8 | CMD ["/bin/bash", "-c"] 9 | -------------------------------------------------------------------------------- /docker/dockerfile.deltann.gpu: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:devel-gpu 2 | COPY sources.list.ubuntu18.04 /etc/apt/sources.list 3 | 4 | # install tools 5 | COPY install.sh /install.sh 6 | RUN /bin/bash /install.sh 7 | 8 | CMD ["/bin/bash", "-c"] 9 | -------------------------------------------------------------------------------- /docker/gen_dockerfile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm dockerfile.delta* 4 | 5 | bash build.sh ci cpu dockerfile 6 | bash build.sh delta cpu dockerfile 7 | bash build.sh delta gpu dockerfile 8 | bash build.sh deltann cpu dockerfile 9 | bash build.sh deltann gpu dockerfile 10 | -------------------------------------------------------------------------------- /docker/install.sh: -------------------------------------------------------------------------------- 1 | apt-get update && apt-get install -y --no-install-recommends \ 2 | autoconf\ 3 | automake \ 4 | clang-format \ 5 | curl \ 6 | git \ 7 | libtool \ 8 | sudo \ 9 | sox \ 10 | tig \ 11 | make \ 12 | vim \ 13 | zlib1g-dev \ 14 | wget \ 15 | subversion \ 16 | ca-certificates \ 17 | unzip \ 18 | patch \ 19 | ffmpeg \ 20 | && \ 21 | apt-get clean && \ 22 | rm -rf /var/lib/apt/lists/* 23 | -------------------------------------------------------------------------------- /docker/install_user.sh: -------------------------------------------------------------------------------- 1 | # fix for tf1.14 docker 2 | # issue https://github.com/tensorflow/tensorflow/issues/29951 3 | sudo apt-get update && sudo apt-get install -y --no-install-recommends gcc-4.8 g++-4.8 && \ 4 | sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 100 && \ 5 | sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 100 && \ 6 | sudo update-alternatives --config gcc && \ 7 | sudo update-alternatives --config g++ && \ 8 | sudo apt-get clean && \ 9 | sudo rm -rf /var/lib/apt/lists/* 10 | 11 | -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | # When update this file, 2 | # please using `pushd docker && bash run.sh && popd` 3 | # to generate dockerfiles for DELTA 4 | 5 | matplotlib 6 | sklearn 7 | pandas 8 | librosa>=0.7.2 9 | numba==0.48.0 #must be this by librosa>=0.7.2 10 | absl-py 11 | jieba 12 | wget 13 | kaldiio 14 | soundfile 15 | textgrid 16 | pyyaml 17 | yapf 18 | gensim 19 | hurry.filesize 20 | imbalanced-learn 21 | sphinx 22 | sphinx_rtd_theme 23 | recommonmark 24 | pylint 25 | cpplint 26 | seqeval 27 | rouge 28 | sacrebleu 29 | pyAudioAnalysis 30 | flake8 31 | sentencepiece 32 | deepdiff 33 | tensorflow-addons==0.11.1 34 | tensorflow-model-optimization 35 | 36 | 37 | # TODO: rm below 38 | # https://github.com/tensorflow/addons/issues/864 39 | tensorflow-cpu==2.3.0 40 | -------------------------------------------------------------------------------- /docker/sources.list.ubuntu18.04: -------------------------------------------------------------------------------- 1 | # https://mirrors.tuna.tsinghua.edu.cn/help/ubuntu/ 2 | # 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释 3 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse 4 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse 5 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse 6 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse 7 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse 8 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse 9 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse 10 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse 11 | 12 | # 预发布软件源,不建议启用 13 | # deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-proposed main restricted universe multiverse 14 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-proposed main restricted universe multiverse 15 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = DELTA 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/_static/.gitkeep -------------------------------------------------------------------------------- /docs/_static/delta_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/_static/delta_logo.png -------------------------------------------------------------------------------- /docs/_templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/_templates/.gitkeep -------------------------------------------------------------------------------- /docs/development/adding_op.md: -------------------------------------------------------------------------------- 1 | # Adding Tensorflow Op 2 | 3 | All `custom-op` are under `delta/layers/ops/` directory. 4 | 5 | 6 | ## Eigen Tensor 7 | 8 | [Eigen Tensor](https://github.com/eigenteam/eigen-git-mirror/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) 9 | is unsupported eigen package, which is the underlying of 10 | [Tensorflow Tensor](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.h). 11 | 12 | 13 | ## Implement Op Kernel 14 | 15 | Implement your op kernel class for underlying computing. 16 | 17 | 18 | ## Create Tensorlow Op Wapper 19 | 20 | Wapper the op kernel by Tensorflow Op or Tensorflow Lite Op. 21 | 22 | 23 | ## Tensorflow 24 | 25 | * [Guide for New Op](https://www.tensorflow.org/guide/extend/op) 26 | * [shape inference](https://github.com/tensorflow/tensorflow/core/framework/shape_inference.h) 27 | 28 | 29 | ## Tensorflow-Lite 30 | 31 | * [TFLite custom ops](https://www.tensorflow.org/lite/guide/ops_custom). 32 | * [TFLite select ops](https://www.tensorflow.org/lite/guide/ops_select) 33 | 34 | 35 | ## References 36 | - [custom-op](https://github.com/tensorflow/custom-op) 37 | - [lingvo](https://github.com/tensorflow/lingvo/tree/master/lingvo/core/ops) 38 | - [Tensorflow Ops](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/kernels) 39 | - [Tensorflow Lite Ops](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/kernels) 40 | -------------------------------------------------------------------------------- /docs/development/model_optimization.md: -------------------------------------------------------------------------------- 1 | # [Model Optimization](https://github.com/tensorflow/model-optimization) 2 | 3 | ## Quantization 4 | 5 | * [Quantization-aweare training](https://github.com/tensorflow/tensorflow/tree/r1.13/tensorflow/contrib/quantize) 6 | * [Post-training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) 7 | 8 | ## Prouning 9 | 10 | ## Compression 11 | 12 | -------------------------------------------------------------------------------- /docs/development/serving.md: -------------------------------------------------------------------------------- 1 | # Serving 2 | 3 | ## TF-Serving 4 | 5 | ### [Install](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/setup.md) 6 | 7 | ### [Developing with Docker](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/building_with_docker.md) 8 | 9 | ### [Pack your model into docker](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/docker.md#creating-your-own-serving-image) 10 | 11 | ### Support Custom Ops 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/installation/install_on_macos.md: -------------------------------------------------------------------------------- 1 | # Install on macOS 2 | 3 | Running DELTA training on a macOS is mostly the same as running on Linux, except some minor differences. 4 | 5 | ## Python environment 6 | 7 | You need to set up a working Python 3.6.x environment, either by using conda or manually build from source. 8 | You can follow the instructions in `manual_setup.md` to set up python and the required packages, e.g. Tensorflow. 9 | Note: `tensorflow-gpu` requires nvidia GPU, which might not be supported the latest macOS versions. You may want to use the `tensorflow` package (no -gpu postfix) instead. Some models that uses cuDNN implementations will not work without a CUDA GPU however. 10 | 11 | ## Other requirements 12 | 13 | ### Notes for Kaldi 14 | 15 | Building and running Kaldi on a macOS requires `wget`, `gawk` and other utilities which need to be installed via `Homebrew`. See `https://brew.sh` for details. 16 | ```shell 17 | /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" 18 | brew install wget gawk grep 19 | ``` 20 | Also the `mmseg` package for Python2 is needed: 21 | ```shell 22 | pip2 install mmseg 23 | ``` 24 | 25 | Then follow `manual_setup.md` / `DELTA install` section to install 3rd-party dependencies. 26 | 27 | -------------------------------------------------------------------------------- /docs/installation/pick_installation.md: -------------------------------------------------------------------------------- 1 | # Pick a installation way for yourself 2 | 3 | ## Multiple installation ways 4 | 5 | Currently we support multiple ways to install `DELTA`. Please choose one 6 | installation for yourself according to your usage and needs. 7 | 8 | ## Install by pip 9 | 10 | For the **quick demo of the features** and **pure NLP users**, you can 11 | install the `nlp` version of `DELTA` by pip with a simple command: 12 | 13 | ```bash 14 | pip install delta-nlp 15 | ``` 16 | 17 | Check here for 18 | [the tutorial for usage of delta-nlp](../tutorials/training/text_class_pip_example.html). 19 | 20 | **Requirements**: You need `tensorflow==2.0.0` and `python==3.6` in 21 | MacOS or Linux. 22 | 23 | ## Install from the source code 24 | 25 | For users who need **whole function of delta** (including speech and 26 | nlp), you can clone our repository and install from the source code. 27 | 28 | Please follow the steps here: 29 | [Install from the source code](install_from_source.html) 30 | 31 | ## Use docker 32 | 33 | For users who are **capable of use docker**, you can pull our images 34 | directly. This maybe the best choice for docker users. 35 | 36 | Please follow the steps here: 37 | [Installation using Docker](using_docker.html) 38 | 39 | -------------------------------------------------------------------------------- /docs/references.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | - [Tensorflow](https://github.com/tensorflow/tensorflow) 4 | - [lingvo](https://github.com/tensorflow/lingvo) 5 | - [Tensor2Tensor](https://github.com/tensorflow/tensor2tensor) 6 | - [Kaldi](https://github.com/kaldi-asr/kaldi) 7 | - [ESPnet](https://github.com/espnet/espnet) 8 | - [models](https://github.com/tensorflow/models) 9 | - [YellowFin](https://github.com/JianGoForIt/YellowFin) 10 | - [yapf](https://github.com/google/yapf) 11 | - [python_speech_features](https://github.com/jameslyons/python_speech_features) 12 | - [abseil-cpp](https://github.com/abseil/abseil-cpp) 13 | -------------------------------------------------------------------------------- /docs/tutorials/deployment/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/tutorials/deployment/.gitkeep -------------------------------------------------------------------------------- /docs/tutorials/training/data/emotion-specch-cls.md: -------------------------------------------------------------------------------- 1 | # Emotion 2 | 3 | using for conflict detection 4 | 5 | # data description 6 | 7 | - data/speech_cls_tas.py, data input pipeline, support `wav` and `feat` data 8 | data dir should like below: 9 | ```text 10 | data/ 11 | conflict/ 12 | id.wav 13 | id.TextGrid 14 | ... 15 | normal/ 16 | id.wav 17 | ... 18 | textgrid.segments 19 | ``` 20 | 21 | `textgrid.segments` generate by `util/gen_segments.sh` 22 | ```text 23 | /datasets/data/emotion/train/conflict/985e92a1636b73fec794fe6_20180907.wav (0.000000,35.611097) (79.816283,219.723946) (227.054141,300.340000) 24 | /datasets/data/emotion/train/conflict/4e110127b04c0d3d269b6df_20180907.wav (0.000000,18.883552) (127.398159,203.824681) (246.327001,268.460183) 25 | ``` 26 | 27 | # other 28 | 29 | - eval_pb.py, eval with graph.pb 30 | - eval_simple_save.py, eval with savedModel 31 | - gen_feat.py, generate feat for wav, used by `feat` mode 32 | - python_speech_features, fbank, mfcc, delta_detlas src 33 | - tf_speech_feature.py, extract feature in TF graph, used by `wav` mode 34 | -------------------------------------------------------------------------------- /docs/tutorials/training/data/kws-cls.md: -------------------------------------------------------------------------------- 1 | # KWS Data 2 | 3 | 训练数据包含三个文件: 4 | 5 | * feat 是二进制文件,存储的是特征数据 6 | * label 是二进制文件,存储的是标注结果 7 | * desc 是一个文本文件,包含每个语句所对应的起始帧和帧数 8 | 9 | ## DESC文件 10 | 文件的第一行是一个整数,表示句子的数量。 11 | 12 | 之后的每一行描述一个语句的信息:语句的key, 起始帧位置,帧数 13 | 14 | 例如 15 | 16 | | sentence key | reserved value | start position | length | 17 | |:------------:|:--------------:|:-------------: |:------:| 18 | | 329110_18210989199_18600024111_1_3_1_driver.pcm.0016 | 0 | 20 | 219 | 19 | 20 | 语句间隔(前后,共40帧) 21 | 22 | 每条语句前后额外有20帧,分别重复第一帧、最后一帧 23 | 24 | ## FEAT文件 25 | 按帧保存特征数据。 26 | 27 | 例如,对于语音识别,每帧包含40维的fbank特征,数据类型为4字节的float。这样,每个语句就包含4 * 40 * 帧数 (字节)的数据。 28 | 29 | 文件头(共12字节): 30 | 31 | * nSample(帧数,4字节float) 32 | * sampleRate(100000,4字节float) 33 | * sampleSize(40维*4,2字节short) 34 | * sampleKind(7,样本类型) 35 | 36 | 37 | ## LABEL文件 38 | 39 | 按帧保存标记数据。 40 | 41 | 例如,对于音素或者半音素模型,每帧的标记为一个4字节的int。 42 | -------------------------------------------------------------------------------- /docs/tutorials/training/egs.md: -------------------------------------------------------------------------------- 1 | # Reproduce experiments - egs 2 | 3 | The `egs` director is data-oriented for `data prepration` and model `training`, `evaluation` and `infering`. 4 | 5 | Sppech and NLP task are orgnized by `egs`, e.g. ASR, speaker verfication, NLP. 6 | 7 | ## An Egs Example 8 | 9 | In this tutorial, we demonstrate an emotion recognition task with an open source dataset: `IEMOCAP`. All other task is same to this. 10 | 11 | A complete process contains following steps: 12 | 13 | - Download the `IEMOCAP` corpus. 14 | - Run egs/iemocap/emo/v1/run.sh script 15 | 16 | Before doing any these steps, please make sure that `delta` has been successfully installed. 17 | 18 | Every time you re-open a terminal, don't forget: 19 | 20 | ``` 21 | source env.sh 22 | ``` 23 | 24 | ### Prepare the Data Set 25 | 26 | Download `IEMOCAP` from https://sail.usc.edu/iemocap/index.html 27 | 28 | ### Run 29 | 30 | First: 31 | 32 | ``` 33 | pushd egs/iemocap/emo/v1 34 | ``` 35 | 36 | Then run `run.sh` script 37 | 38 | ``` 39 | ./run.sh --iemocap_root= 40 | ``` 41 | 42 | For other task, e.g. `ASR`, `Speaker`, the main script is `run_delta.sh`, but default main root is `run.sh`. 43 | 44 | 45 | -------------------------------------------------------------------------------- /docs/tutorials/training/imags/log_spectrum_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/tutorials/training/imags/log_spectrum_compare.png -------------------------------------------------------------------------------- /docs/tutorials/training/imags/mfcc_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/tutorials/training/imags/mfcc_compare.png -------------------------------------------------------------------------------- /docs/tutorials/training/imags/pitch_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/tutorials/training/imags/pitch_compare.png -------------------------------------------------------------------------------- /docs/tutorials/training/imags/plp_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/tutorials/training/imags/plp_compare.png -------------------------------------------------------------------------------- /docs/tutorials/training/imags/speech_features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/docs/tutorials/training/imags/speech_features.png -------------------------------------------------------------------------------- /docs/version.md: -------------------------------------------------------------------------------- 1 | # Version 2 | 3 | Version No. 4 | 5 | ```text 6 | v{major}.{minor}.{stage}.{revision} 7 | ``` 8 | 9 | | stage | No. | description | e.g | 10 | | --- | --- | --- | --- | 11 | | Alpha | 0.5 | smoke test, estimate gains | v0.0.5.0 | 12 | | Beta | 0.7 | integration test | v0.0.7.2 | 13 | | RC1 | 0.8 | stress test | v0.0.8.1 | 14 | | RC2 | 0.9 | AB-test, evaluate gains | v0.0.9.0 | 15 | | Release | 1.0 | production | v0.1.0.0 | 16 | 17 | # Release Version 18 | 19 | Make sure all PRs under milestone `v0.3.2` are closed, then close the milestone. 20 | Using below command to generate relase note. 21 | 22 | `python tools/release_notes.py -c didi delta v0.3.2` 23 | -------------------------------------------------------------------------------- /dpl/gadapter/.gitignore: -------------------------------------------------------------------------------- 1 | saved_model/* 2 | -------------------------------------------------------------------------------- /dpl/gadapter/README.md: -------------------------------------------------------------------------------- 1 | # gadapter - convert graph 2 | 3 | ## saved model 4 | TF and TF-SERVING model 5 | 6 | Saving saved_model from `input_model` with version number from `model.yaml` 7 | 8 | ```bash 9 | saved_model/${version}/varables* 10 | saved_model/${version}/graph.pb 11 | ``` 12 | 13 | ## tfgraph 14 | Frozen graph 15 | 16 | ## tflite 17 | TF-Lite model 18 | 19 | ## tftrt 20 | TF-TRT model 21 | -------------------------------------------------------------------------------- /dpl/gadapter/saved_model/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/gadapter/saved_model/.gitkeep -------------------------------------------------------------------------------- /dpl/gadapter/tfgraph/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/gadapter/tfgraph/.gitkeep -------------------------------------------------------------------------------- /dpl/gadapter/tflite/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/gadapter/tflite/.gitkeep -------------------------------------------------------------------------------- /dpl/gadapter/tftrt/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/gadapter/tftrt/.gitkeep -------------------------------------------------------------------------------- /dpl/lib/custom_ops/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/lib/custom_ops/.gitkeep -------------------------------------------------------------------------------- /dpl/lib/deltann/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/lib/deltann/.gitkeep -------------------------------------------------------------------------------- /dpl/lib/tensorflow/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/lib/tensorflow/.gitkeep -------------------------------------------------------------------------------- /dpl/lib/tflite/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/lib/tflite/.gitkeep -------------------------------------------------------------------------------- /dpl/model/saved_model/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /dpl/model/saved_model/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/dpl/model/saved_model/.gitkeep -------------------------------------------------------------------------------- /egs/atis/README.md: -------------------------------------------------------------------------------- 1 | ## References 2 | 3 | Charles T. Hemphill, John J. Godfrey, and George R. Doddington. 1990. The ATIS spoken language systems pilot corpus. 4 | In Proceedings of the DARPA Speech and Natural Language Workshop. http://www.aclweb.org/anthology/ H90-1021. 5 | 6 | ## Download Links 7 | 8 | https://github.com/howl-anderson/ATIS_dataset/raw/master/data/raw_data/ms-cntk-atis 9 | 10 | ## Description 11 | 12 | the Air Travel Information System (ATIS) pilot corpus, 13 | a corpus designed to measure progress in Spoken Language Systems that include both a speech and natural language component. 14 | This pilot marks the first full-scale attempt to collect such a corpus and provides guidelines for future efforts. 15 | 16 | 17 | ## Data scale introduction 18 | 19 | - Training size:4,978 20 | - Development size:- 21 | - Test size:893 22 | - Intents:26 23 | - Slots:129 24 | -------------------------------------------------------------------------------- /egs/atis/nlu-joint/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1; 10 | wget -P $data https://github.com/howl-anderson/ATIS_dataset/raw/master/data/raw_data/ms-cntk-atis/atis.train.pkl || exit 1 11 | wget -P $data https://github.com/howl-anderson/ATIS_dataset/raw/master/data/raw_data/ms-cntk-atis/atis.test.pkl || exit 1 12 | fi 13 | 14 | if [ ${start_stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 15 | # generate data with standard format 16 | python3 local/summary_data.py $data/train.txt $data/test.txt || exit 1 17 | fi 18 | -------------------------------------------------------------------------------- /egs/atis2/nlu_joint/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1 10 | git clone https://github.com/yvchen/JointSLU.git JointSLU || exit 1 11 | mv JointSLU/data origin_data && rm -r -f JointSLU || exit 1 12 | fi 13 | 14 | if [ ${start_stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 15 | # generate data with standard format 16 | python3 local/generate_standard_format.py origin_data/atis-2.train.w-intent.iob $data/train.txt || exit 1 17 | python3 local/generate_standard_format.py origin_data/atis-2.dev.w-intent.iob $data/dev.txt || exit 1 18 | python3 local/generate_standard_format.py origin_data/atis.test.w-intent.iob $data/test.txt || exit 1 19 | fi 20 | -------------------------------------------------------------------------------- /egs/cnn_dailymail/seq2seq/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=1 5 | data=./data 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1; 10 | wget -P $data https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfTHk4NFg2SndKcjQ || exit 1 11 | tar zxvf $data/cnn_stories.tgz -C $data || exit 1 12 | fi 13 | 14 | if [ ${start_stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 15 | # split train, dev and test set 16 | git clone https://github.com/abisee/cnn-dailymail 17 | python local/make_datafiles.py $data $data || exit 1 18 | fi 19 | 20 | if [ ${start_stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then 21 | # scale data 22 | python utils/scale.py $data/train.cnndm.src $data/train.small.cnndm.src 0.05 || exit 1 23 | python utils/scale.py $data/train.cnndm.tgt $data/train.small.cnndm.tgt 0.05 || exit 1 24 | python utils/scale.py $data/val.cnndm.src $data/val.small.cnndm.src 0.05 || exit 1 25 | python utils/scale.py $data/val.cnndm.tgt $data/val.small.cnndm.tgt 0.05 || exit 1 26 | python utils/scale.py $data/test.cnndm.src $data/test.small.cnndm.src 0.05 || exit 1 27 | python utils/scale.py $data/test.cnndm.tgt $data/test.small.cnndm.tgt 0.05 || exit 1 28 | fi 29 | -------------------------------------------------------------------------------- /egs/cnn_dailymail/seq2seq/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../utils/nlp 2 | -------------------------------------------------------------------------------- /egs/conll2003/README.md: -------------------------------------------------------------------------------- 1 | ## References 2 | 3 | https://www.aclweb.org/anthology/W03-0419 4 | 5 | ## links for download data 6 | 7 | https://www.clips.uantwerpen.be/conll2003/ner/ 8 | 9 | ## Description 10 | 11 | The CoNLL-2003 named entity data consists of eight files covering two languages: English and German1. 12 | For each of the languages there is a training file, a development file, a test file and a large file with unannotated data. 13 | The learning methods were trained with the training data. The development data could be used for tuning the parameters 14 | of the learning methods 15 | 16 | ## Data scale introduction 17 | 18 | | English DataSet | Articles | Sentences | Tokens | 19 | |---|---|---|---| 20 | | Training set | 946 | 14,987 | 203,621 21 | | Development set | 216 | 3,466 | 51,362 | 22 | | Test set | 231 | 3,684 | 46,435 | 23 | 24 | 25 | | English DataSet | LOC | MISC | ORG |PER| 26 | |---|---|---|---|---| 27 | | Training set | 7140 | 3438 | 6321 | 6600| 28 | | Development set |1837 | 922 | 1341 |1842| 29 | | Test set | 1668 |702| 1661|1617| 30 | 31 | The more details about Germanl Dataset is shown in paper. 32 | 33 | -------------------------------------------------------------------------------- /egs/conll2003/pretrain/v1/README.md: -------------------------------------------------------------------------------- 1 | 1. You need to delete bilm/model.py 134-176 sentence after download bilm-tf project; 2 | 2. change 178 sentence as : 3 | # concatenate the layers 4 | lm_embeddings = tf.concat( 5 | [tf.expand_dims(t, axis=1) for t in layers], 6 | axis=1 7 | ) 8 | 9 | return { 10 | 'lm_embeddings': lm_embeddings, 11 | 'lengths': lm_graph.sequence_lengths, 12 | 'token_embeddings': lm_graph.embedding, 13 | 'mask': lm_graph.mask, 14 | } 15 | -------------------------------------------------------------------------------- /egs/conll2003/seq_label/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/hkust/asr/v1/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | dump 3 | run.log* 4 | fbank 5 | exp 6 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/README.md: -------------------------------------------------------------------------------- 1 | # HKUST 2 | 3 | ## Results 4 | 5 | Results on `test` subsets. 6 | 7 | | Model | Token | Feat | CER% | LM | Decoder | Baseline | Reference | Config | NGPU | Front End | 8 | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | 9 | | 5BLSTM | Charactor | 83 fbank+pitch | 34.89 | w/o | greedy | 38.67 | [Miao et al. (2016)](https://www.cs.cmu.edu/~ymiao/pub/icassp2016_ctc.pdf) | asr-ctc.yml | 2 | kaldi | 10 | | 5BLSTM | Charactor | 43 fbank+pitch | 36.49 | w/o | greedy | 38.67 | [Miao et al. (2016)](https://www.cs.cmu.edu/~ymiao/pub/icassp2016_ctc.pdf) | - | 2 | kaldi | 11 | | 2CNN+4BLSTM | Charactor | 83 fbank+pitch | 31.55 | w/o | greedy | - | - | CTCAsrModel | 2 | kaldi | 12 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/conf/cmu2pinyin: -------------------------------------------------------------------------------- 1 | AA A 2 | AE A 3 | AH A 4 | AO UO 5 | AW U 6 | AY AI 7 | B B 8 | CH CH 9 | D D 10 | DH S I 11 | EH AI 12 | ER E 13 | EY AI 14 | F F 15 | G G 16 | HH H 17 | IH I 18 | IY I 19 | JH ZH 20 | K K 21 | L L 22 | M M 23 | N N 24 | NG N 25 | OW UO 26 | OY UO 27 | P P 28 | R R 29 | S S 30 | SH SH 31 | T T 32 | TH S 33 | UH U 34 | UW U 35 | V W 36 | W W 37 | Y Y 38 | Z Z 39 | ZH X 40 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/conf/fbank.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=16000 2 | --num-mel-bins=40 3 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/conf/gpu.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* 3 | option mem=* -l mem_free=$0,ram_free=$0 4 | option mem=0 # Do not add anything to qsub_opts 5 | option num_threads=* -pe smp $0 6 | option num_threads=1 # Do not add anything to qsub_opts 7 | option max_jobs_run=* -tc $0 8 | default gpu=0 9 | option gpu=0 10 | option gpu=* -l 'hostname=b1[12345678]*|c*,gpu=$0' -q g.q -------------------------------------------------------------------------------- /egs/hkust/asr/v1/conf/pinyin2cmu: -------------------------------------------------------------------------------- 1 | A AA 2 | AI AY 3 | AN AE N 4 | ANG AE NG 5 | AO AW 6 | B B 7 | CH CH 8 | C T S 9 | D D 10 | E ER 11 | EI EY 12 | EN AH N 13 | ENG AH NG 14 | ER AA R 15 | F F 16 | G G 17 | H HH 18 | IA IY AA 19 | IANG IY AE NG 20 | IAN IY AE N 21 | IAO IY AW 22 | IE IY EH 23 | I IY 24 | ING IY NG 25 | IN IY N 26 | IONG IY UH NG 27 | IU IY UH 28 | J J 29 | K K 30 | L L 31 | M M 32 | N N 33 | O AO 34 | ONG UH NG 35 | OU OW 36 | P P 37 | Q Q 38 | R R 39 | SH SH 40 | S S 41 | T T 42 | UAI UW AY 43 | UANG UW AE NG 44 | UAN UW AE N 45 | UA UW AA 46 | UI UW IY 47 | UN UW AH N 48 | UO UW AO 49 | U UW 50 | UE IY EH 51 | VE IY EH 52 | V IY UW 53 | VN IY N 54 | W W 55 | X X 56 | Y Y 57 | ZH JH 58 | Z Z 59 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/conf/pitch.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=16000 2 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/conf/queue.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* 3 | option mem=* -l mem_free=$0,ram_free=$0 4 | option mem=0 # Do not add anything to qsub_opts 5 | option num_threads=* -pe smp $0 6 | option num_threads=1 # Do not add anything to qsub_opts 7 | option max_jobs_run=* -tc $0 8 | default gpu=0 9 | option gpu=0 10 | option gpu=* -l gpu=$0 -q g.q 11 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/conf/slurm.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command sbatch --export=PATH --ntasks-per-node=1 3 | option time=* --time $0 4 | option mem=* --mem-per-cpu $0 5 | option mem=0 # Do not add anything to qsub_opts 6 | option num_threads=* --cpus-per-task $0 --ntasks-per-node=1 7 | option num_threads=1 --cpus-per-task 1 --ntasks-per-node=1 # Do not add anything to qsub_opts 8 | default gpu=0 9 | option gpu=0 -p cpu 10 | option gpu=* -p gpu --gres=gpu:$0 11 | # note: the --max-jobs-run option is supported as a special case 12 | # by slurm.pl and you don't have to handle it in the config file. 13 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/local/create_oov_char_lexicon.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na) 3 | # 4 | # A script for char-based Chinese OOV lexicon generation. 5 | # 6 | # Input 1: char-based dictionary, example 7 | # CHAR1 ph1 ph2 8 | # CHAR2 ph3 9 | # CHAR3 ph2 ph4 10 | # 11 | # Input 2: OOV word list, example 12 | # WORD1 13 | # WORD2 14 | # WORD3 15 | # 16 | # where WORD1 is in the format of "CHAR1CHAR2". 17 | # 18 | # Output: OOV lexicon, in the format of normal lexicon 19 | 20 | if($#ARGV != 1) { 21 | print STDERR "usage: perl create_oov_char_lexicon.pl chardict oovwordlist > oovlex\n\n"; 22 | print STDERR "### chardict: a dict in which each line contains the pronunciation of one Chinese char\n"; 23 | print STDERR "### oovwordlist: OOV word list\n"; 24 | print STDERR "### oovlex: output OOV lexicon\n"; 25 | exit; 26 | } 27 | 28 | use utf8; 29 | my %prons; 30 | open(DICT, $ARGV[0]) || die("Can't open dict ".$ARGV[0]."\n"); 31 | binmode(DICT,":encoding(utf8)"); 32 | foreach () { 33 | chomp; @A = split(" ", $_); $prons{$A[0]} = $A[1]; 34 | } 35 | close DICT; 36 | 37 | open(WORDS, $ARGV[1]) || die("Can't open oov word list ".$ARGV[1]."\n"); 38 | binmode(WORDS,":encoding(utf8)"); 39 | while () { 40 | chomp; 41 | print $_; 42 | @A = split("", $_); 43 | foreach (@A) { 44 | print " $prons{$_}"; 45 | } 46 | print "\n"; 47 | } 48 | close WORDS; 49 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/local/hkust_format_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | 4 | if [ -f ./path.sh ]; then . ./path.sh; fi 5 | 6 | mkdir -p data/train data/dev 7 | 8 | # Copy stuff into its final locations... 9 | 10 | for f in spk2utt utt2spk wav.scp text segments reco2file_and_channel; do 11 | cp data/local/train/$f data/train/$f || exit 1; 12 | done 13 | 14 | for f in spk2utt utt2spk wav.scp text segments reco2file_and_channel; do 15 | cp data/local/dev/$f data/dev/$f || exit 1; 16 | done 17 | 18 | echo hkust_format_data succeeded. 19 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/local/hkust_normalize.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright Chao Weng 4 | 5 | # normalizations for hkust trascript 6 | # see the docs/trans-guidelines.pdf for details 7 | 8 | while () { 9 | @A = split(" ", $_); 10 | print "$A[0] "; 11 | for ($n = 1; $n < @A; $n++) { 12 | $a = $A[$n]; 13 | if (($a eq "{breath}")||($a eq "{cough}")||($a eq "{sneeze}") 14 | || ($a eq "{lipsmack}")) {print "[VOCALIZED-NOISE] "; next;} 15 | if (($a eq "{laugh}")) {print "[LAUGHTER] "; next;} 16 | if (($a eq "")) {print "[NOISE] "; next;} 17 | $tmp = $a; 18 | if ($tmp =~ /[^.,?+-]{0,}[.,?+-]+/) { $tmp =~ s:([^.,?+-]{0,})[.,?+-]+:$1:; } 19 | if ($tmp =~ /\~[A-Z]/) { $tmp =~ s:\~([A-Z]):$1:; } 20 | if ($tmp =~ /%\S/) { $tmp =~ s:%(\S):$1:; } 21 | if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);} 22 | print "$tmp "; 23 | } 24 | print "\n"; 25 | } 26 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/local/hkust_segment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | #coding:utf-8 3 | 4 | from __future__ import print_function 5 | import sys 6 | from mmseg import seg_txt 7 | for line in sys.stdin: 8 | blks = str.split(line) 9 | out_line = blks[0] 10 | for i in range(1, len(blks)): 11 | if blks[i] == "[VOCALIZED-NOISE]" or blks[i] == "[NOISE]" or blks[i] == "[LAUGHTER]": 12 | out_line += " " + blks[i] 13 | continue 14 | for j in seg_txt(blks[i]): 15 | out_line += " " + j 16 | print(out_line) 17 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/path.sh: -------------------------------------------------------------------------------- 1 | 2 | if [ -z $MAIN_ROOT ];then 3 | source ../../../../env.sh 4 | echo "source env.sh" 5 | fi 6 | 7 | export LC_ALL=C 8 | export PATH=$PATH:$PWD/utils/:$PWD 9 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/run_delta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | stage=-1 4 | stop_stage=100 5 | config_file=asr-ctc.yml 6 | 7 | source path.sh 8 | source utils/parse_options.sh || exit 1; 9 | 10 | # Set bash to 'debug' mode, it will exit on : 11 | # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', 12 | set -e 13 | set -u 14 | set -o pipefail 15 | 16 | echo "Running from stage $stage ..." 17 | 18 | if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then 19 | echo "Prepare data..." 20 | bash run.sh --stage -1 --stop_stage 2 21 | echo "Prepare data done." 22 | fi 23 | 24 | if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then 25 | echo "Train and Eval..." 26 | python3 -u $MAIN_ROOT/delta/main.py --config conf/$config_file --cmd train_and_eval 27 | echo "Train and Eval Done." 28 | fi 29 | 30 | if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then 31 | echo "Export Model..." 32 | python3 -u $MAIN_ROOT/delta/main.py --config conf/$config_file --cmd export_model 33 | echo "Export Model Done." 34 | fi 35 | 36 | if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then 37 | echo "Inspect Saved Model..." 38 | ckpt_dir=exp/asr-ctc/ckpt 39 | saved_model_dir=$ckpt_dir/export 40 | inspect_saved_model.sh $saved_model_dir 41 | echo "Inspect Saved Model Done." 42 | fi 43 | 44 | if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then 45 | echo "Eval with Saved Model..." 46 | python3 $MAIN_ROOT/delta/serving/eval_asr_pb.py --config conf/$config_file --mode eval --gpu 0 47 | echo "Eval with Saved Model Done." 48 | fi 49 | -------------------------------------------------------------------------------- /egs/hkust/asr/v1/steps: -------------------------------------------------------------------------------- 1 | ../../../../tools/kaldi/egs/wsj/s5/steps -------------------------------------------------------------------------------- /egs/hkust/asr/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../tools/kaldi/egs/wsj/s5/utils/ -------------------------------------------------------------------------------- /egs/iemocap/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/egs/iemocap/README.md -------------------------------------------------------------------------------- /egs/iemocap/emo/v1/path.sh: -------------------------------------------------------------------------------- 1 | if [ -z $MAIN_ROOT ];then 2 | pushd ../../../../ && source env.sh && pushd 3 | echo "source env.sh" 4 | fi 5 | 6 | export PATH=$PATH:$PWD/utils/:$PWD 7 | -------------------------------------------------------------------------------- /egs/iemocap/emo/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . ./path.sh 4 | 5 | set -e 6 | 7 | stage=0 8 | stop_stage=100 9 | 10 | config_file=conf/emo-keras-blstm.yml 11 | iemocap_root=/export/corpus/iemocap # dataset root dir 12 | 13 | . utils/parse_options.sh # e.g. this parses the --stage option if supplied. 14 | 15 | if [[ $stage -le 0 && $stop_stage -ge 0 && ! -d "data" ]]; then 16 | echo "mkdir data" 17 | mkdir -p data 18 | fi 19 | 20 | #1. collect data 21 | if [[ $stage -le 1 && $stop_stage -ge 1 && ! -f "data/data_collected.pickle" ]]; then 22 | echo "collect data" 23 | python3 -u local/python/mocap_data_collect.py $iemocap_root || exit 1 24 | fi 25 | 26 | #2. to save `wav`, `text`, `label` to `dmpy dir 27 | if [[ $stage -le 2 && $stop_stage -ge 2 && ! -d "./data/dump" ]]; then 28 | echo "dump" 29 | mkdir -p data/dump 30 | python3 local/python/dump_data_from_pickle.py || exit 1 31 | fi 32 | 33 | echo "Using config $config_file" 34 | 35 | #3. make fbank 36 | if [[ $stage -le 3 && $stop_stage -ge 3 ]];then 37 | echo "compute feature" 38 | python3 -u $MAIN_ROOT/delta/main.py --cmd gen_feat --config $config_file || exit 1 39 | fi 40 | 41 | #4. make cmvn 42 | if [[ $stage -le 4 && $stop_stage -ge 4 ]]; then 43 | echo "compute cmvn" 44 | python3 -u $MAIN_ROOT/delta/main.py --cmd gen_cmvn --config $config_file || exit 1 45 | fi 46 | 47 | #5. train and eval 48 | if [[ $stage -le 5 && $stop_stage -ge 5 ]]; then 49 | echo "taining and evaluation" 50 | python3 -u $MAIN_ROOT/delta/main.py --cmd train_and_eval --config $config_file || exit 1 51 | fi 52 | -------------------------------------------------------------------------------- /egs/iemocap/emo/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils/ -------------------------------------------------------------------------------- /egs/mini_an4/README.md: -------------------------------------------------------------------------------- 1 | # [AN4](http://www.speech.cs.cmu.edu/databases/an4) 2 | 3 | This database, also known as AN4 and as the Alphanumeric database, was recorded internally at CMU circa 1991. It has been used in several theses over the years. 4 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | downloads 3 | dump 4 | exp 5 | fbank 6 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/decode.yaml: -------------------------------------------------------------------------------- 1 | # decoding parameter 2 | beam-size: 2 3 | penalty: 0.0 4 | maxlenratio: 0.0 5 | minlenratio: 0.0 6 | ctc-weight: 0.5 7 | lm-weight: 1.0 8 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/fbank.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=16000 2 | --num-mel-bins=80 3 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/gpu.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* 3 | option mem=* -l mem_free=$0,ram_free=$0 4 | option mem=0 # Do not add anything to qsub_opts 5 | option num_threads=* -pe smp $0 6 | option num_threads=1 # Do not add anything to qsub_opts 7 | option max_jobs_run=* -tc $0 8 | default gpu=0 9 | option gpu=0 10 | option gpu=* -l 'hostname=b1[12345678]*|c*,gpu=$0' -q g.q -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/lm.yaml: -------------------------------------------------------------------------------- 1 | layer: 1 # 2 for character LMs 2 | unit: 10 # 650 for character LMs 3 | opt: sgd # adam for character LMs 4 | sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 5 | batchsize: 2 # 1024 for character LMs 6 | epoch: 3 # number of epochs 7 | patience: 2 8 | maxlen: 40 # 150 for character LMs 9 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/pitch.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=16000 2 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/preprocess.yaml: -------------------------------------------------------------------------------- 1 | process: 2 | # these three processes are a.k.a. SpecAugument 3 | - type: "time_warp" 4 | max_time_warp: 5 5 | inplace: true 6 | mode: "PIL" 7 | - type: "freq_mask" 8 | F: 30 9 | n_mask: 2 10 | inplace: true 11 | replace_with_zero: false 12 | - type: "time_mask" 13 | T: 40 14 | n_mask: 2 15 | inplace: true 16 | replace_with_zero: false 17 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/queue.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* 3 | option mem=* -l mem_free=$0,ram_free=$0 4 | option mem=0 # Do not add anything to qsub_opts 5 | option num_threads=* -pe smp $0 6 | option num_threads=1 # Do not add anything to qsub_opts 7 | option max_jobs_run=* -tc $0 8 | default gpu=0 9 | option gpu=0 10 | option gpu=* -l gpu=$0 -q g.q 11 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/slurm.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command sbatch --export=PATH --ntasks-per-node=1 3 | option time=* --time $0 4 | option mem=* --mem-per-cpu $0 5 | option mem=0 # Do not add anything to qsub_opts 6 | option num_threads=* --cpus-per-task $0 --ntasks-per-node=1 7 | option num_threads=1 --cpus-per-task 1 --ntasks-per-node=1 # Do not add anything to qsub_opts 8 | default gpu=0 9 | option gpu=0 -p cpu 10 | option gpu=* -p gpu --gres=gpu:$0 11 | # note: the --max-jobs-run option is supported as a special case 12 | # by slurm.pl and you don't have to handle it in the config file. 13 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/conf/train.yaml: -------------------------------------------------------------------------------- 1 | # minibatch related 2 | batch-size: 2 3 | maxlen-in: 800 # if input length > maxlen_in, batchsize is automatically reduced 4 | maxlen-out: 150 # if output length > maxlen_out, batchsize is automatically reduced 5 | # optimization related 6 | sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 7 | opt: adadelta 8 | epochs: 3 9 | patience: 2 10 | 11 | # scheduled sampling option 12 | sampling-probability: 0.0 13 | 14 | # encoder related 15 | etype: blstmp # encoder architecture type 16 | elayers: 2 17 | eunits: 32 18 | eprojs: 32 19 | subsample: "1_2_2_1_1" # skip every n frame from input to nth layers 20 | # decoder related 21 | dlayers: 1 22 | dunits: 30 23 | # attention related 24 | atype: location 25 | adim: 32 26 | aconv-chans: 4 27 | aconv-filts: 3 28 | 29 | # hybrid CTC/attention 30 | mtlalpha: 0.5 31 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/downloads.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/egs/mini_an4/asr/v1/downloads.tar.gz -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/dutils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/path.sh: -------------------------------------------------------------------------------- 1 | if [ -z $MAIN_ROOT ];then 2 | source ../../../../env.sh 3 | echo "source env.sh" 4 | fi 5 | 6 | export LC_ALL=C 7 | # https://github.com/espnet/espnet/pull/1090 8 | export PYTHONIOENCODING=UTF-8 9 | 10 | export PATH=$MAIN_ROOT/utils/:$MAIN_ROOT/utils/speech:$PWD:$PWD/utils:$PATH 11 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/run_delta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | stage=-1 4 | stop_stage=100 5 | config_file=asr-ctc.yml 6 | 7 | source path.sh 8 | source $MAIN_ROOT/utils/parse_options.sh || exit 1; 9 | 10 | # Set bash to 'debug' mode, it will exit on : 11 | # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', 12 | set -e 13 | set -u 14 | set -o pipefail 15 | 16 | export CUDA_VISIBLE_DEVICES='' 17 | 18 | echo "Running from stage $stage ..." 19 | 20 | if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then 21 | echo "Prepare data..." 22 | bash run.sh --stage -1 --stop_stage 2 23 | echo "Prepare data done." 24 | fi 25 | 26 | if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then 27 | echo "Train and Eval..." 28 | python3 -u $MAIN_ROOT/delta/main.py --config conf/$config_file --cmd train_and_eval 29 | echo "Train and Eval Done." 30 | fi 31 | 32 | 33 | if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then 34 | echo "Train..." 35 | python3 -u $MAIN_ROOT/delta/main.py --config conf/$config_file --cmd train 36 | echo "Train Done." 37 | fi 38 | 39 | if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then 40 | echo "Eval..." 41 | python3 -u $MAIN_ROOT/delta/main.py --config conf/$config_file --cmd eval 42 | echo "Eval Done." 43 | fi 44 | 45 | if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then 46 | echo "Infer..." 47 | python3 -u $MAIN_ROOT/delta/main.py --config conf/$config_file --cmd infer 48 | echo "Infer Done." 49 | fi 50 | -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/speech: -------------------------------------------------------------------------------- 1 | ../../../../utils/speech/ -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/steps: -------------------------------------------------------------------------------- 1 | ../../../../tools/kaldi/egs/wsj/s5/steps -------------------------------------------------------------------------------- /egs/mini_an4/asr/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../tools/kaldi/egs/wsj/s5/utils -------------------------------------------------------------------------------- /egs/mock_text_cls_data/text_cls/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1; 10 | python3 local/generate_mock_data.py data/train.txt data/eval.txt data/test.txt data/text_vocab.txt || exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /egs/mock_text_cls_data/text_cls/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/mock_text_match_data/text_match/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1; 10 | python3 local/generate_mock_data.py data/train.txt data/dev.txt data/test.txt data/text_vocab.txt || exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /egs/mock_text_match_data/text_match/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/mock_text_nlu_joint_data/nlu-joint/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1; 10 | python3 local/generate_mock_data.py data/train.txt data/dev.txt data/test.txt data/text_vocab.txt || exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /egs/mock_text_nlu_joint_data/nlu-joint/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/mock_text_seq2seq_data/seq2seq/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1; 10 | python local/generate_mock_data.py data/train.txt data/dev.txt data/test.txt || exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /egs/mock_text_seq2seq_data/seq2seq/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../utils -------------------------------------------------------------------------------- /egs/mock_text_seq_label_data/seq-label/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1; 10 | python3 local/generate_mock_data.py data/train.txt data/dev.txt data/test.txt data/text_vocab.txt data/label_vocab.txt || exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /egs/mock_text_seq_label_data/seq-label/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/msra_ner/README.md: -------------------------------------------------------------------------------- 1 | ## References 2 | 3 | https://github.com/Determined22/zh-NER-TF 4 | 5 | ## Description 6 | 7 | MSRA datasets are in the news domain about NER. 8 | 9 | ## Download Links 10 | 11 | https://github.com/Determined22/zh-NER-TF/tree/master/data_path 12 | 13 | ## Data scale introduction 14 | 15 | | Entity Type | PER | LOC | ORG |Sentence| 16 | |---|---|---|---|---| 17 | | Training set | 17,615 | 36,517 | 20,571 | 46,364 18 | | Development set | - | - | - | - | 19 | | Test set | 1,973 | 2,877 | 1,311|4365| 20 | -------------------------------------------------------------------------------- /egs/msra_ner/seq_label/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/quora_qp/README.md: -------------------------------------------------------------------------------- 1 | ## References 2 | 3 | Shankar Iyar, Nikhil Dandekar, and Kornél Csernai. “First Quora Dataset Release: Question Pairs,” 24 January 2016. 4 | Retrieved at https://data.quora.com/First-Quora-Dataset-Release-Question-Pairs on 31 January 2017. Data fields 5 | 6 | ## Download Links 7 | 8 | http://qim.ec.quoracdn.net/quora_duplicate_questions.tsv 9 | 10 | ## Description 11 | 12 | Quora is a place to gain and share knowledge—about anything. It’s a platform to ask questions and connect with people who contribute unique insights and quality answers. 13 | This empowers people to learn from each other and to better understand the world.Kagglers are challenged to tackle this natural 14 | language processing problem by applying advanced techniques to classify whether question pairs are duplicates or not. Doing so will make it easier to find high quality 15 | answers to questions resulting in an improved experience for Quora writers, seekers, and readers. 16 | source is available: https://data.quora.com/First-Quora-Dataset-Release-Question-Pairs 17 | 18 | ## Data scale introduction 19 | 20 | - Training size:327,469 21 | - Development size:36,387 22 | - Test size:40,431 23 | -------------------------------------------------------------------------------- /egs/quora_qp/match/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | _url ="https://firebasestorage.googleapis.com/v0/b/mtl-sentence" \ 7 | "-representations.appspot.com/o/data%2FQQP.zip?alt=media&" \ 8 | "token=700c6acf-160d-4d89-81d1-de4191d02cb5" 9 | 10 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 11 | # download quora_qp data path data/QQP/[train.tsv,test.tsv,dev.tsv] 12 | #to do 13 | python3 local/load_data.py _url $data ||exit 1 14 | 15 | fi 16 | 17 | if [ ${start_stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 18 | # generate_standard_format 19 | #label sentence1 sentence2 20 | python3 local/generate_standard_format.py $data/QQP/original/quora_duplicate_questions.tsv $data/quora_stand.txt || exit 1 21 | fi 22 | 23 | if [ ${start_stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then 24 | # split dev and train 25 | python3 utils/nlp/split_train_dev.py $data/quora_stand.txt $data/train_dev.txt $data/test.txt 0.1 || exit 1 26 | python3 utils/nlp/split_train_dev.py $data/train_dev.txt $data/train.txt $data/dev.txt 0.1 || exit 1 27 | rm $data/train_dev.txt 28 | fi 29 | 30 | if [ ${start_stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then 31 | # scale data 32 | python3 utils/nlp/scale.py $data/train.txt $data/train_small.txt 0.01 || exit 1 33 | python3 utils/nlp/scale.py $data/dev.txt $data/dev_small.txt 0.01 || exit 1 34 | python3 utils/nlp/scale.py $data/test.txt $data/test_small.txt 0.01 || exit 1 35 | fi 36 | -------------------------------------------------------------------------------- /egs/quora_qp/match/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/snli/README.md: -------------------------------------------------------------------------------- 1 | ## References 2 | 3 | Stanford Natural Language Inference (SNLI) corpus is released in A large annotated corpus for learning natural language inference 4 | 5 | Available: https://sigann.github.io/LAW-XI-2017/papers/LAW01.pdf 6 | 7 | ## Download Links 8 | 9 | https://nlp.stanford.edu/projects/snli/snli_1.0.zip 10 | 11 | ## Description 12 | 13 | Stanford Natural Language Inference corpus is a new, freely available collection of labeled sentence pairs, written by humans doing a novel grounded task based on image captioning. At 570K pairs, it is two orders of magnitude larger than all other resources of its type. This in- crease in scale allows lexicalized classi- fiers to outperform some sophisticated ex- isting entailment models, and it allows a neural network-based model to perform competitively on natural language infer- ence benchmarks for the first time. 14 | 15 | ## Data scale introduction 16 | 17 | - Training pairs:550,152 18 | - Development pairs:10,000 19 | - Test pairs:10,000 20 | -------------------------------------------------------------------------------- /egs/snli/match/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | url=https://nlp.stanford.edu/projects/snli/snli_1.0.zip 7 | 8 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 9 | # download data 10 | [ -d $data ] || mkdir -p $data || exit 1; 11 | wget -P $data $url || exit 1 12 | unzip $data/snli_1.0.zip -d $data || exit 1 13 | 14 | fi 15 | 16 | if [ ${start_stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 17 | # generate_standard_format 18 | #label sentence1 sentence2 19 | python3 local/generate_standard_format.py $data/snli_1.0/snli_1.0_dev.jsonl $data/dev.txt || exit 1 20 | python3 local/generate_standard_format.py $data/snli_1.0/snli_1.0_test.jsonl $data/test.txt || exit 1 21 | python3 local/generate_standard_format.py $data/snli_1.0/snli_1.0_train.jsonl $data/train.txt || exit 1 22 | fi 23 | 24 | if [ ${start_stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then 25 | # scale data 26 | python3 utils/nlp/scale.py $data/train.txt $data/train_small.txt 0.05 || exit 1 27 | python3 utils/nlp/scale.py $data/dev.txt $data/dev_small.txt 0.05 || exit 1 28 | python3 utils/nlp/scale.py $data/test.txt $data/test_small.txt 0.05 || exit 1 29 | fi 30 | -------------------------------------------------------------------------------- /egs/snli/match/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /egs/sre16/v1/README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow x-vector 2 | 3 | The recipe uses the following data for system development. This is in 4 | addition to the NIST SRE 2016 dataset used for evaluation (see ../README.txt). 5 | 6 | Corpus LDC Catalog No. 7 | SWBD2 Phase 1 LDC98S75 8 | SWBD2 Phase 2 LDC99S79 9 | SWBD2 Phase 3 LDC2002S06 10 | SWBD Cellular 1 LDC2001S13 11 | SWBD Cellular 2 LDC2004S07 12 | SRE2004 LDC2006S44 13 | SRE2005 Train LDC2011S01 14 | SRE2005 Test LDC2011S04 15 | SRE2006 Train LDC2011S09 16 | SRE2006 Test 1 LDC2011S10 17 | SRE2006 Test 2 LDC2012S01 18 | SRE2008 Train LDC2011S05 19 | SRE2008 Test LDC2011S08 20 | SRE2010 Eval LDC2017S06 21 | Mixer 6 LDC2013S03 22 | 23 | The following datasets are used in data augmentation. 24 | 25 | MUSAN http://www.openslr.org/17 26 | RIR_NOISES http://www.openslr.org/28 27 | 28 | ## refs 29 | 30 | * [x-vector-kaldi-tf](https://github.com/hsn-zeinali/x-vector-kaldi-tf/) 31 | * [kaldi](https://github.com/kaldi-asr/kaldi/tree/master/egs/sre16/v2) 32 | * [espnet](https://github.com/espnet/espnet) 33 | 34 | -------------------------------------------------------------------------------- /egs/sre16/v1/conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=8000 2 | --frame-length=25 # the default is 25 3 | --low-freq=20 # the default. 4 | --high-freq=3700 # the default is zero meaning use the Nyquist (4k in this case). 5 | --num-ceps=23 # higher than the default which is 12. 6 | --snip-edges=false 7 | -------------------------------------------------------------------------------- /egs/sre16/v1/conf/vad.conf: -------------------------------------------------------------------------------- 1 | --vad-energy-threshold=5.5 2 | --vad-energy-mean-scale=0.5 3 | --vad-proportion-threshold=0.12 4 | --vad-frames-context=2 5 | -------------------------------------------------------------------------------- /egs/sre16/v1/local/make_musan.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 David Snyder 3 | # Apache 2.0. 4 | # 5 | # This script, called by ../run.sh, creates the MUSAN 6 | # data directory. The required dataset is freely available at 7 | # http://www.openslr.org/17/ 8 | 9 | set -e 10 | in_dir=$1 11 | data_dir=$2 12 | use_vocals='Y' 13 | 14 | mkdir -p local/musan.tmp 15 | 16 | echo "Preparing ${data_dir}/musan..." 17 | mkdir -p ${data_dir}/musan 18 | local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} 19 | 20 | utils/fix_data_dir.sh ${data_dir}/musan 21 | 22 | grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music 23 | grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech 24 | grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise 25 | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ 26 | ${data_dir}/musan ${data_dir}/musan_music 27 | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ 28 | ${data_dir}/musan ${data_dir}/musan_speech 29 | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ 30 | ${data_dir}/musan ${data_dir}/musan_noise 31 | 32 | utils/fix_data_dir.sh ${data_dir}/musan_music 33 | utils/fix_data_dir.sh ${data_dir}/musan_speech 34 | utils/fix_data_dir.sh ${data_dir}/musan_noise 35 | 36 | rm -rf local/musan.tmp 37 | 38 | -------------------------------------------------------------------------------- /egs/sre16/v1/local/make_sre.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 David Snyder 3 | # Apache 2.0. 4 | # 5 | # See README.txt for more info on data required. 6 | 7 | set -e 8 | 9 | data_root=$1 10 | data_dir=$2 11 | 12 | wget -P data/local/ http://www.openslr.org/resources/15/speaker_list.tgz 13 | tar -C data/local/ -xvf data/local/speaker_list.tgz 14 | sre_ref=data/local/speaker_list 15 | 16 | local/make_sre.pl $data_root/LDC2006S44/ \ 17 | 04 $sre_ref $data_dir/sre2004 18 | 19 | local/make_sre.pl $data_root/LDC2011S01 \ 20 | 05 $sre_ref $data_dir/sre2005_train 21 | 22 | local/make_sre.pl $data_root/LDC2011S04 \ 23 | 05 $sre_ref $data_dir/sre2005_test 24 | 25 | local/make_sre.pl $data_root/LDC2011S09 \ 26 | 06 $sre_ref $data_dir/sre2006_train 27 | 28 | local/make_sre.pl $data_root/LDC2011S10 \ 29 | 06 $sre_ref $data_dir/sre2006_test_1 30 | 31 | local/make_sre.pl $data_root/LDC2012S01 \ 32 | 06 $sre_ref $data_dir/sre2006_test_2 33 | 34 | rm data/local/speaker_list.* 35 | -------------------------------------------------------------------------------- /egs/sre16/v1/local/make_sre_BUT.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 David Snyder 3 | # Apache 2.0. 4 | # 5 | # See README.txt for more info on data required. 6 | 7 | set -e 8 | 9 | data_root=$1 10 | data_dir=$2 11 | 12 | wget -P data/local/ http://www.openslr.org/resources/15/speaker_list.tgz 13 | tar -C data/local/ -xvf data/local/speaker_list.tgz 14 | sre_ref=data/local/speaker_list 15 | 16 | local/make_sre.pl $data_root/NIST/sre04 \ 17 | 04 $sre_ref $data_dir/sre2004 18 | 19 | local/make_sre.pl $data_root/NIST/sre05/r101_1_1/train \ 20 | 05 $sre_ref $data_dir/sre2005_train 21 | 22 | local/make_sre.pl $data_root/NIST/sre05/r101_1_1/test \ 23 | 05 $sre_ref $data_dir/sre2005_test 24 | 25 | local/make_sre.pl $data_root/NIST/sre06/r108_1_1/train \ 26 | 06 $sre_ref $data_dir/sre2006_train 27 | 28 | local/make_sre.pl $data_root/NIST/sre06/r108_1_1/test \ 29 | 06 $sre_ref $data_dir/sre2006_test_1 30 | 31 | #local/make_sre.pl $data_root/NIST/sre06/r108_1_1/test \ 32 | # 06 $sre_ref $data_dir/sre2006_test_2 33 | 34 | rm data/local/speaker_list.* 35 | -------------------------------------------------------------------------------- /egs/sre16/v1/path.sh: -------------------------------------------------------------------------------- 1 | if [ -z $KALDI_ROOT ];then 2 | source ../../../env.sh 3 | echo "source env.sh" 4 | fi 5 | 6 | export PATH=$PWD/utils/:$PWD:$PATH 7 | export LC_ALL=C 8 | -------------------------------------------------------------------------------- /egs/sre16/v1/sid: -------------------------------------------------------------------------------- 1 | ../../../tools/kaldi/egs/sre08/v1/sid/ -------------------------------------------------------------------------------- /egs/sre16/v1/steps: -------------------------------------------------------------------------------- 1 | ../../../tools/kaldi/egs/wsj/s5/steps/ -------------------------------------------------------------------------------- /egs/sre16/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../tools/kaldi/egs/wsj/s5/utils/ -------------------------------------------------------------------------------- /egs/trec/README.md: -------------------------------------------------------------------------------- 1 | ## References 2 | 3 | Xin Li, Dan Roth, Learning Question Classifiers. COLING'02, Aug., 2002. 4 | 5 | ## Description 6 | 7 | This data collection contains all the data used in our learning question classification experiments(Xin Li, Dan Roth, Learning Question Classifiers. COLING'02, Aug., 2002.), 8 | which has question class definitions, the training and testing question sets, examples of preprocessing the questions, feature definition scripts and examples of semantically related word features. 9 | This work has been done by Xin Li and Dan Roth and supported by Research supported by (NSF grants IIS-9801638 and ITR IIS-0085836 and an ONR MURI Award.) . 10 | 11 | ## Download Links 12 | 13 | https://github.com/thtrieu/qclass_dl/tree/master/data 14 | 15 | ## Data scale introduction 16 | 17 | - Training size:5452 18 | - Development size:- 19 | - Test size:500 20 | 21 | -------------------------------------------------------------------------------- /egs/trec/text_cls/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1; 10 | wget -P $data https://raw.githubusercontent.com/thtrieu/qclass_dl/master/data/train || exit 1 11 | wget -P $data https://raw.githubusercontent.com/thtrieu/qclass_dl/master/data/test || exit 1 12 | fi 13 | 14 | if [ ${start_stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 15 | # change data format 16 | [ -d $data ] || mkdir -p $data || exit 1; 17 | python3 local/change_data_format.py $data/train $data/train.txt|| exit 1 18 | python3 local/change_data_format.py $data/test $data/test.txt || exit 1 19 | fi 20 | -------------------------------------------------------------------------------- /egs/voxceleb/README.md: -------------------------------------------------------------------------------- 1 | ../../tools/kaldi/egs/voxceleb/README.txt -------------------------------------------------------------------------------- /egs/voxceleb/spk/v1/.gitignore: -------------------------------------------------------------------------------- 1 | conf/*.yml 2 | data 3 | exp 4 | mfcc 5 | -------------------------------------------------------------------------------- /egs/voxceleb/spk/v1/conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=16000 2 | --frame-length=25 # the default is 25 3 | --low-freq=20 # the default. 4 | --high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case). 5 | --num-mel-bins=30 6 | --num-ceps=30 7 | --snip-edges=false 8 | -------------------------------------------------------------------------------- /egs/voxceleb/spk/v1/conf/vad.conf: -------------------------------------------------------------------------------- 1 | --vad-energy-threshold=5.5 2 | --vad-energy-mean-scale=0.5 3 | --vad-proportion-threshold=0.12 4 | --vad-frames-context=2 5 | -------------------------------------------------------------------------------- /egs/voxceleb/spk/v1/local: -------------------------------------------------------------------------------- 1 | ../../../../tools/kaldi/egs/voxceleb/v1/local/ -------------------------------------------------------------------------------- /egs/voxceleb/spk/v1/path.sh: -------------------------------------------------------------------------------- 1 | if [ -z $KALDI_ROOT ];then 2 | pushd ../../../.. && source env.sh && popd 3 | echo "source env.sh" 4 | fi 5 | 6 | export PATH=$PWD/utils/:$PWD:$PATH 7 | export LC_ALL=C 8 | -------------------------------------------------------------------------------- /egs/voxceleb/spk/v1/sid: -------------------------------------------------------------------------------- 1 | ../../../../tools/kaldi/egs/sre08/v1/sid/ -------------------------------------------------------------------------------- /egs/voxceleb/spk/v1/steps: -------------------------------------------------------------------------------- 1 | ../../../../tools/kaldi/egs/wsj/s5/steps/ -------------------------------------------------------------------------------- /egs/voxceleb/spk/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../tools/kaldi/egs/wsj/s5/utils/ -------------------------------------------------------------------------------- /egs/yahoo_answer/README.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | Yahoo answers are obtained from (Zhang et al., 2015). This is a topic classification task with 10 classes: Society & Culture, 3 | Science & Mathematics, Health, Education & Reference, Computers & Internet, Sports, Business & Finance, Entertainment & Music, 4 | Family & Relationships and Politics & Government. The document we use includes question titles, question contexts and best answers. 5 | 6 | ## Download links 7 | 8 | https://s3.amazonaws.com/fast-ai-nlp/yahoo_answers_csv.tg 9 | 10 | ## Data scale introduction 11 | 12 | we split the raw data into training set, development dataset and test dataset 13 | - Training dataset:1260,000 14 | - Development pairs:140,000 15 | - Test pairs:60,000 16 | 17 | 18 | -------------------------------------------------------------------------------- /egs/yahoo_answer/text_cls/v1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_stage=0 4 | stop_stage=100 5 | data=./data/ 6 | 7 | if [ ${start_stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 8 | # download data 9 | [ -d $data ] || mkdir -p $data || exit 1 10 | wget -P $data https://s3.amazonaws.com/fast-ai-nlp/yahoo_answers_csv.tgz || exit 1 11 | tar zxvf $data/yahoo_answers_csv.tgz -C $data || exit 1 12 | fi 13 | 14 | if [ ${start_stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 15 | # generate data with standard format 16 | python3 local/generate_standard_format.py $data/yahoo_answers_csv/train.csv $data/train_all.txt || exit 1 17 | python3 local/generate_standard_format.py $data/yahoo_answers_csv/test.csv $data/test.txt || exit 1 18 | fi 19 | 20 | if [ ${start_stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then 21 | # split dev and train 22 | python3 utils/nlp/split_train_dev.py $data/train_all.txt $data/train.txt $data/dev.txt 0.1 || exit 1 23 | fi 24 | 25 | if [ ${start_stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then 26 | # scale data 27 | python3 utils/nlp/scale.py $data/train.txt $data/train.small.txt 0.01 || exit 1 28 | python3 utils/nlp/scale.py $data/dev.txt $data/dev.small.txt 0.01 || exit 1 29 | python3 utils/nlp/scale.py $data/test.txt $data/test.small.txt 0.01 || exit 1 30 | fi 31 | -------------------------------------------------------------------------------- /egs/yahoo_answer/text_cls/v1/utils: -------------------------------------------------------------------------------- 1 | ../../../../utils -------------------------------------------------------------------------------- /gcompiler/.gitginore: -------------------------------------------------------------------------------- 1 | third_party/pybind11/ 2 | -------------------------------------------------------------------------------- /gcompiler/cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- 1 | include(FetchContent) 2 | 3 | set(PYBIND11_DIR ${DELTA_INFER_ROOT}/third_party/pybind11) 4 | 5 | FetchContent_GetProperties(pybind11) 6 | if(NOT pybind11_POPULATED) 7 | FetchContent_Declare( 8 | pybind11 9 | GIT_REPOSITORY https://github.com/pybind/pybind11.git 10 | GIT_TAG master 11 | DOWNLOAD_DIR ${PYBIND11_DIR} 12 | SOURCE_DIR ${PYBIND11_DIR} 13 | ) 14 | delta_msg(INFO STR "pybind11: ${PYBIND11_DIR}") 15 | FetchContent_MakeAvailable(pybind11) 16 | endif() 17 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(DELTA_INFER_SRC "") 3 | delta_fetch_include_recursively(${DELTA_INFER_DELTA_INFER}) 4 | 5 | add_subdirectory(${DELTA_INFER_DELTA_INFER}/custom_grappler/) 6 | 7 | if(BUILD_DELTA_INFER_CUSTOM_OPS) 8 | add_subdirectory(${DELTA_INFER_DELTA_INFER}/custom_ops/) 9 | endif() 10 | 11 | add_subdirectory(${DELTA_INFER_DELTA_INFER}/core/) 12 | 13 | add_subdirectory(${DELTA_INFER_DELTA_INFER}/scheduler/) 14 | 15 | #delta_msg(INFO STR "Get src: ${DELTA_INFER_SRC}") 16 | 17 | if(BUILD_SHARED) 18 | cc_library(delta_infer SHARED SRCS ${DELTA_INFER_SRC} DEPS pywrap_tf_internal LINK_LIBS ${DELTA_INFER_LINK_LIBS}) 19 | else() 20 | cc_library(delta_infer STATIC SRCS ${DELTA_INFER_SRC} DEPS pywrap_tf_internal LINK_LIBS ${DELTA_INFER_LINK_LIBS}) 21 | endif() 22 | 23 | # putting test after lib is compiled. 24 | #add_subdirectory(${DELTA_INFER_DELTA_INFER}/test/) 25 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/core/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | delta_fetch_files_with_suffix(${DELTA_INFER_DELTA_INFER}/core "cc" DELTA_INFER_SRC) 3 | 4 | set(DELTA_INFER_SRC ${DELTA_INFER_SRC} PARENT_SCOPE) 5 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/core/config.h: -------------------------------------------------------------------------------- 1 | #ifndef _DELTA_INFER_CONFIG_H_ 2 | #define _DELTA_INFER_CONFIG_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace tensorflow { 13 | 14 | namespace delta { 15 | 16 | /// basic entry value of config 17 | class Entry { 18 | public: 19 | Entry() {} 20 | virtual ~Entry() {} 21 | }; 22 | 23 | class Config { 24 | public: 25 | static Config& Instance() { 26 | static Config config_ins; 27 | return config_ins; 28 | } 29 | 30 | bool have(const std::string& key) const { 31 | for (auto it = _algo_map.begin(); it != _algo_map.end(); ++it) { 32 | if (it->first == key) { 33 | return true; 34 | } 35 | } 36 | return false; 37 | } 38 | 39 | void add(const std::string& key, std::shared_ptr entry) { 40 | _algo_map[key] = entry; 41 | } 42 | 43 | std::shared_ptr operator[](const std::string& key) { 44 | if (this->have(key)) { 45 | return _algo_map[key]; 46 | } 47 | return nullptr; 48 | } 49 | 50 | private: 51 | Config() {} 52 | std::unordered_map< std::string, std::shared_ptr > _algo_map; 53 | }; 54 | 55 | } /* namespace delta */ 56 | 57 | } /* namespace tensorflow */ 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/custom_grappler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | delta_fetch_files_with_suffix(${DELTA_INFER_DELTA_INFER}/custom_grappler "cc" DELTA_INFER_SRC) 3 | 4 | set(DELTA_INFER_SRC ${DELTA_INFER_SRC} PARENT_SCOPE) 5 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/custom_grappler/auto_fusion.h: -------------------------------------------------------------------------------- 1 | #ifndef _DELTA_INFER_AUTO_FUSION_H_ 2 | #define _DELTA_INFER_AUTO_FUSION_H_ 3 | 4 | #include "delta_infer/core/scatter_search.h" 5 | #include "delta_infer/custom_grappler/local_optimizer.h" 6 | 7 | namespace tensorflow { 8 | namespace grappler { 9 | 10 | /// help function to regist grpah pattern for automatic fusion 11 | void RegisterFusionPattern(const std::string& name, const Pattern& graph, 12 | const std::string& hint_op); 13 | 14 | class AutoFusion : public LocalOptimizer { 15 | public: 16 | explicit AutoFusion(const string& name) : LocalOptimizer(name) {} 17 | 18 | virtual GraphDef* GenOptGraph(const GraphDef* original_graph) final; 19 | }; 20 | 21 | } /* namespace grappler */ 22 | } /* namespace tensorflow */ 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/custom_ops/platform/X86/transformer_functor.cc: -------------------------------------------------------------------------------- 1 | #include "delta_infer/custom_ops/transformer_cell_functor.h" 2 | 3 | namespace tensorflow { 4 | 5 | namespace x86 {} /* namespace x86 */ 6 | 7 | // using CPUDevice = Eigen::ThreadPoolDevice; 8 | 9 | template <> 10 | TransformerCellFunctor::TransformerCellFunctor() {} 11 | 12 | template <> 13 | TransformerCellFunctor::~TransformerCellFunctor() {} 14 | 15 | template <> 16 | void TransformerCellFunctor::init( 17 | TransformerParam& param) {} 18 | 19 | template <> 20 | void TransformerCellFunctor::operator()( 21 | OpKernelContext* context, TransformerParam& param) { 22 | printf("didi test in CPU \n"); 23 | exit(0); 24 | } 25 | 26 | } /* namespace tensorflow */ 27 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/custom_ops/platform/X86/transformer_nlp_functor.cc: -------------------------------------------------------------------------------- 1 | #include "delta_infer/custom_ops/transformer_cell_nlp_functor.h" 2 | 3 | namespace tensorflow { 4 | 5 | namespace x86 {} /* namespace x86 */ 6 | 7 | // using CPUDevice = Eigen::ThreadPoolDevice; 8 | 9 | template <> 10 | TransformerCellNLPFunctor::TransformerCellNLPFunctor() {} 11 | 12 | template <> 13 | TransformerCellNLPFunctor::~TransformerCellNLPFunctor() {} 14 | 15 | template <> 16 | void TransformerCellNLPFunctor::init( 17 | TransformerNLPParam& param) {} 18 | 19 | template <> 20 | void TransformerCellNLPFunctor::operator()( 21 | OpKernelContext* context, TransformerNLPParam& param) { 22 | printf("ccw test in CPU \n"); 23 | exit(0); 24 | } 25 | 26 | } /* namespace tensorflow */ 27 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/scheduler/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/gcompiler/delta_infer/scheduler/CMakeLists.txt -------------------------------------------------------------------------------- /gcompiler/delta_infer/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | delta_fetch_files_with_suffix(${DELTA_INFER_DELTA_INFER}/test "cc" DELTA_INFER_TESTS) 3 | 4 | foreach(SRC_NAME ${DELTA_INFER_TESTS}) 5 | #unpack the dir "/" 6 | string(REPLACE "/" ";" SEXY_LIST ${SRC_NAME}) 7 | list(GET SEXY_LIST -1 TEST_CASE_NAME) 8 | #get the file name without suffix 9 | string(REPLACE "." ";" SEXY_LIST ${TEST_CASE_NAME}) 10 | list(GET SEXY_LIST 0 TEST_CASE_NAME) 11 | find_package(Threads) 12 | cc_binary(${TEST_CASE_NAME} SRCS ${SRC_NAME} DEPS delta_infer pywrap_tf_internal LINK_LIBS ${DELTA_INFER_LINK_LIBS} ${CMAKE_THREAD_LIBS_INIT}) 13 | endforeach() 14 | 15 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/test/basic_test.cc: -------------------------------------------------------------------------------- 1 | #include "absl/strings/match.h" 2 | #include "absl/strings/substitute.h" 3 | #include "tensorflow/core/framework/graph.pb.h" 4 | #include "tensorflow/core/grappler/grappler_item.h" 5 | #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" 6 | #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" 7 | #include "tensorflow/core/grappler/utils.h" 8 | #include "tensorflow/core/lib/core/status.h" 9 | #include "tensorflow/core/lib/gtl/map_util.h" 10 | #include "tensorflow/core/platform/protobuf.h" 11 | #include "tensorflow/core/protobuf/config.pb.h" 12 | 13 | namespace tensorflow { 14 | namespace grappler { 15 | 16 | void test_for_optimizer() { 17 | const std::vector optimizers = 18 | CustomGraphOptimizerRegistry::GetRegisteredOptimizers(); 19 | if (std::count(optimizers.begin(), optimizers.end(), "TestOptimizer") != 1) { 20 | std::cout << "get test_optimizer error!\n"; 21 | } 22 | std::unique_ptr test_optimizer = 23 | CustomGraphOptimizerRegistry::CreateByNameOrNull("TestOptimizer"); 24 | 25 | if (!test_optimizer) { 26 | std::cout << "get test_optimizer error!\n"; 27 | exit(1); 28 | } 29 | std::cout << "Get registered optimizer: " << test_optimizer->name() 30 | << " suc!\n"; 31 | } 32 | 33 | } // namespace grappler 34 | } // namespace tensorflow 35 | 36 | int main(int argc, const char** argv) { 37 | tensorflow::grappler::test_for_optimizer(); 38 | return 1; 39 | } 40 | -------------------------------------------------------------------------------- /gcompiler/delta_infer/test/subgraphs/TansformerCell.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/gcompiler/delta_infer/test/subgraphs/TansformerCell.pb -------------------------------------------------------------------------------- /gcompiler/example/python/nlp_transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/gcompiler/example/python/nlp_transformer/__init__.py -------------------------------------------------------------------------------- /gcompiler/example/python/standard_transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/gcompiler/example/python/standard_transformer/__init__.py -------------------------------------------------------------------------------- /gcompiler/example/python/tts_transformer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import delta_infer as dti 3 | from tts_transformer.model import * 4 | 5 | 6 | @dti.RegistPattern(name="TransformerCell") 7 | def TransformerCellType(src, src_mask, d_model, nhead, dim_feedforward=2048): 8 | output = TransformerEncoderLayer( 9 | d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="gelu")( 10 | src, src_mask=src_mask, training=None) 11 | return output 12 | 13 | 14 | if __name__ == "__main__": 15 | tf.compat.v1.disable_eager_execution() 16 | # open graph optimizer stream 17 | with dti.GraphStream("./model2pb.pb") as gs: 18 | batch_size = 16 19 | seq_length = 1600 20 | hidden_size = 512 21 | nhead = 4 22 | 23 | #src_mask = tf.placeholder(tf.float32, shape=(batch_size, 1, seq_length, seq_length)) 24 | 25 | src = tf.compat.v1.placeholder( 26 | tf.float32, shape=(batch_size, seq_length, hidden_size)) 27 | 28 | out_trans = TransformerCellType( 29 | src=src, 30 | src_mask=None, 31 | d_model=hidden_size, 32 | nhead=nhead, 33 | dim_feedforward=1080) 34 | # remove in the future 35 | gs.register_hint_op("TransformerCell", "BatchMatMulV2") 36 | gs.save("./tts_result.pb") 37 | 38 | with tf.compat.v1.Session() as sess: 39 | graph_def = dti.RegistPattern.get_patterns("TransformerCell")[0] 40 | with open("TransformerCell_tts.pb", "wb") as f: 41 | f.write( 42 | tf.compat.v1.graph_util.remove_training_nodes( 43 | graph_def).SerializeToString()) 44 | sess.close() 45 | -------------------------------------------------------------------------------- /gcompiler/example/python/tts_transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/gcompiler/example/python/tts_transformer/__init__.py -------------------------------------------------------------------------------- /gcompiler/install/bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=1 ./bazel-bin/tensorflow/tools/benchmark/benchmark_model \ 4 | --graph="/nfs/project/models/frozen_graph.pb" \ 5 | --input_layer='inputs:0' \ 6 | --input_layer_shape="32,1000,40,1" \ 7 | --input_layer_type="float" \ 8 | --input_layer_values="" \ 9 | --output_layer='softmax_output:0' 10 | -------------------------------------------------------------------------------- /gcompiler/install/viewgraph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | model=$1 4 | 5 | netron --port 8013 --host 10.186.1.228 $model 6 | -------------------------------------------------------------------------------- /gcompiler/python/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | *.o 3 | *.pyc 4 | __pycache__ 5 | *.egg-info 6 | dist/ 7 | *.pb 8 | -------------------------------------------------------------------------------- /gcompiler/python/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/gcompiler/python/README.md -------------------------------------------------------------------------------- /gcompiler/python/delta_infer/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | 8 | from .subgraphs import * 9 | from .optimizer import GraphStream 10 | -------------------------------------------------------------------------------- /gcompiler/python/delta_infer/subgraphs/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import * 2 | from .common import * 3 | 4 | #tf.compat.v1.disable_eager_execution() 5 | # 6 | #batch_size = 40 7 | #seq_length = 200 8 | #hidden_size = 768 9 | #num_attention_heads =12 10 | #size_per_head = int(hidden_size / num_attention_heads) 11 | # 12 | #layer_input = tf.compat.v1.placeholder(tf.float32, shape=(batch_size*seq_length, hidden_size)) 13 | ## Tensor of shape [batch_size, from_seq_length, to_seq_length]. 14 | #attention_mask = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, seq_length, seq_length)) 15 | # 16 | #output_rnn = transformer_cell(input_tensor=layer_input,#tf.reshape(layer_input, [-1, hidden_size]), 17 | # attention_mask=attention_mask, 18 | # hidden_size=hidden_size, 19 | # num_attention_heads=num_attention_heads, 20 | # attention_head_size=size_per_head, 21 | # batch_size = batch_size, 22 | # seq_length = seq_length, 23 | # intermediate_size=1280) 24 | -------------------------------------------------------------------------------- /gcompiler/python/delta_infer/subgraphs/common/__init__.py: -------------------------------------------------------------------------------- 1 | from .generator import * 2 | from .summarize_graph import * 3 | -------------------------------------------------------------------------------- /gcompiler/python/delta_infer/subgraphs/common/generator.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import sys 5 | import tensorflow as tf 6 | from tensorflow.python.framework import graph_util 7 | from tensorflow.compat.v1 import Session 8 | 9 | from .summarize_graph import GraphSummary 10 | 11 | __all__ = ["RegistPattern"] 12 | 13 | 14 | class RegistPattern(object): 15 | """ Rgist Pattern Decorator""" 16 | # a pattern map form name to list of GraphDef 17 | # note that a key name maybe map to multi graphdefs. 18 | patterns = {} 19 | 20 | def __init__(self, name=None): 21 | self.name = name 22 | if name not in RegistPattern.patterns: 23 | RegistPattern.patterns[name] = [] 24 | 25 | @staticmethod 26 | def get_patterns(name): 27 | return RegistPattern.patterns[name] 28 | 29 | @staticmethod 30 | def Patterns(): 31 | return RegistPattern.patterns 32 | 33 | def __call__(self, func): 34 | 35 | def local(*args, **kwargs): 36 | with Session() as sess: 37 | ret = func(*args, **kwargs) 38 | #sess.run(tf.compat.v1.global_variables_initializer()) 39 | tf.compat.v1.global_variables_initializer().run() 40 | graph_summary = GraphSummary(graph_def=sess.graph_def) 41 | graph_summary.Summary() 42 | graph_def = graph_util.\ 43 | convert_variables_to_constants(sess, sess.graph_def, graph_summary["outputs"]) 44 | RegistPattern.patterns[self.name].append(graph_def) 45 | return ret 46 | 47 | return local 48 | -------------------------------------------------------------------------------- /gcompiler/python/delta_infer/subgraphs/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import * 2 | -------------------------------------------------------------------------------- /gcompiler/third_party/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Delta-ML/delta/31dfebc8f20b7cb282b62f291ff25a87e403cc86/gcompiler/third_party/.gitkeep -------------------------------------------------------------------------------- /tools/.gitignore: -------------------------------------------------------------------------------- 1 | tensorflow_pkg 2 | -------------------------------------------------------------------------------- /tools/install/build_pip_pkg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PIP_NAME="delta-nlp" 4 | 5 | echo "Uninstall ${PIP_NAME} if exist ..." 6 | pip3 uninstall -y ${PIP_NAME} 7 | 8 | echo "Build binary distribution wheel file ..." 9 | BASH_DIR=`dirname "$BASH_SOURCE"` 10 | pushd ${BASH_DIR}/../.. 11 | rm -rf build/ ${PIP_NAME}.egg-info/ dist/ 12 | python3 setup.py bdist_wheel 13 | popd 14 | -------------------------------------------------------------------------------- /tools/install/install-deltann.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BASH_DIR=`dirname "$BASH_SOURCE"` 4 | 5 | pushd ${BASH_DIR}/../.. 6 | source env.sh 7 | popd 8 | 9 | pushd ${MAIN_ROOT}/tools 10 | make deltann || echo "deltann install failed" && exit -1 11 | popd 12 | 13 | echo "deltann has been installed successfully ~" 14 | -------------------------------------------------------------------------------- /tools/install/install-go.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | #url="$(wget -qO- https://golang.org/dl/ | grep -oP 'https:\/\/dl\.google\.com\/go\/go([0-9\.]+)\.linux-amd64\.tar\.gz' | head -n 1 )" 4 | #latest="$(echo $url | grep -oP 'go[0-9\.]+' | grep -oP '[0-9\.]+' | head -c -2 )" 5 | #echo "Downloading latest Go for AMD64: ${latest}" 6 | latest="1.14.4" 7 | url="https://mirrors.ustc.edu.cn/golang/go${latest}.linux-amd64.tar.gz" 8 | wget --quiet --continue --show-progress "${url}" 9 | unset url 10 | 11 | sudo tar -C /usr/local -xzf go"${latest}".linux-amd64.tar.gz 12 | 13 | echo "Create the skeleton for your local users go directory" 14 | mkdir -p ~/go/{bin,pkg,src} 15 | 16 | echo "Setting up GOPATH" 17 | echo "export GOPATH=~/go" >> ../go.env 18 | 19 | echo "Setting PATH to include golang binaries" 20 | echo "export PATH='$PATH':/usr/local/go/bin:$GOPATH/bin" >> ../go.env 21 | echo "export GO111MODULE=on" >> ../go.env 22 | 23 | # Remove Download 24 | rm go"${latest}".linux-amd64.tar.gz 25 | 26 | # Print Go Version 27 | /usr/local/go/bin/go version 28 | 29 | pushd ../ 30 | source go.env 31 | popd 32 | -------------------------------------------------------------------------------- /tools/install/install-ops.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BASH_DIR=`dirname "$BASH_SOURCE"` 4 | 5 | if [ -z $MAIN_ROOT ];then 6 | pushd ${BASH_DIR}/../../ 7 | source env.sh 8 | popd 9 | fi 10 | 11 | rm $MAIN_ROOT/tools/compile_ops.done || true 12 | 13 | pushd $MAIN_ROOT/tools && make compile_ops.done && popd 14 | -------------------------------------------------------------------------------- /tools/install/sph2pipe.patch: -------------------------------------------------------------------------------- 1 | diff --git a/sph2pipe.c b/sph2pipe.c 2 | index d0d502b..928739c 100644 3 | --- a/sph2pipe.c 4 | +++ b/sph2pipe.c 5 | @@ -103,6 +103,7 @@ 6 | 7 | #define _SPH_CONVERT_MAIN_ 8 | 9 | +#include 10 | #include "sph_convert.h" 11 | #include "ulaw.h" 12 | 13 | diff --git a/ulaw.h b/ulaw.h 14 | index 990c833..62ff96e 100644 15 | --- a/ulaw.h 16 | +++ b/ulaw.h 17 | @@ -82,3 +82,6 @@ GLOBAL short int alaw2pcm[256] 18 | } 19 | #endif 20 | ; 21 | + 22 | +uchar pcm2ulaw( short int sample ); 23 | +uchar pcm2alaw( short int pcmval ); 24 | -------------------------------------------------------------------------------- /tools/license/LICENSE_cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | -------------------------------------------------------------------------------- /tools/license/LICENSE_py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | -------------------------------------------------------------------------------- /tools/license/add_license.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z $MAIN_ROOT ];then 4 | source env.sh 5 | fi 6 | 7 | on_exit() { 8 | unlink $MAIN_ROOT/tools/license/license.cpp.list 9 | unlink $MAIN_ROOT/tools/license/license.py.list 10 | } 11 | 12 | #cpp 13 | if ! [ -f $MAIN_ROOT/tools/license/license.cpp.list ];then 14 | for dir in delta dpl tools/test utils; 15 | do 16 | find $dir -name *.cc -or -name *.h >> $MAIN_ROOT/tools/license/license.cpp.list 17 | done 18 | trap on_exit EXIT 19 | fi 20 | 21 | while read file; 22 | do 23 | if ! grep -q Copyright $file 24 | then 25 | echo "process: $file" 26 | cat $MAIN_ROOT/tools/license/LICENSE_cpp $file >$file.new && mv $file.new $file 27 | fi 28 | done < $MAIN_ROOT/tools/license/license.cpp.list 29 | 30 | #python 31 | if ! [ -f $MAIN_ROOT/tools/license/license.py.list ];then 32 | for dir in delta dpl tools/test utils; 33 | do 34 | find $dir -name *.py >> $MAIN_ROOT/tools/license/license.py.list 35 | done 36 | trap on_exit EXIT 37 | fi 38 | 39 | while read file; 40 | do 41 | if ! grep -q Copyright $file 42 | then 43 | echo "process: $file" 44 | cat $MAIN_ROOT/tools/license/LICENSE_py $file >$file.new && mv $file.new $file 45 | fi 46 | done < $MAIN_ROOT/tools/license/license.py.list 47 | -------------------------------------------------------------------------------- /tools/plugins/vim/plugin/yapf.vim: -------------------------------------------------------------------------------- 1 | " Copyright 2015 Google Inc. All Rights Reserved. 2 | " 3 | " Licensed under the Apache License, Version 2.0 (the "License"); 4 | " you may not use this file except in compliance with the License. 5 | " You may obtain a copy of the License at 6 | " 7 | " http://www.apache.org/licenses/LICENSE-2.0 8 | " 9 | " Unless required by applicable law or agreed to in writing, software 10 | " distributed under the License is distributed on an "AS IS" BASIS, 11 | " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | " See the License for the specific language governing permissions and 13 | " limitations under the License. 14 | """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 15 | " VIM command for YAPF support 16 | " 17 | " Place this script in your ~/.vim/plugin directory. You can call the 18 | " command YAPF. If you omit the range, it will reformat the whole 19 | " buffer. 20 | " 21 | " example: 22 | " :YAPF " formats whole buffer 23 | " :'<,'>YAPF " formats lines selected in visual mode 24 | 25 | command! -range=% YAPF ,call yapf#YAPF() 26 | -------------------------------------------------------------------------------- /tools/requirements.txt: -------------------------------------------------------------------------------- 1 | # When update this file, 2 | # please using `pushd docker && bash run.sh && popd` 3 | # to generate dockerfiles for DELTA 4 | 5 | matplotlib 6 | sklearn 7 | pandas 8 | librosa>=0.7.2 9 | numba==0.48.0 #must be this by librosa>=0.7.2 10 | absl-py 11 | jieba 12 | wget 13 | kaldiio 14 | soundfile 15 | textgrid 16 | pyyaml 17 | yapf 18 | gensim 19 | hurry.filesize 20 | imbalanced-learn 21 | sphinx 22 | sphinx_rtd_theme 23 | recommonmark 24 | pylint 25 | cpplint 26 | seqeval 27 | rouge 28 | sacrebleu 29 | pyAudioAnalysis 30 | flake8 31 | sentencepiece 32 | deepdiff 33 | tensorflow-addons==0.11.1 34 | tensorflow-model-optimization 35 | 36 | 37 | # TODO: rm below 38 | # https://github.com/tensorflow/addons/issues/864 39 | tensorflow-cpu==2.3.0 40 | -------------------------------------------------------------------------------- /tools/test/.gitignore: -------------------------------------------------------------------------------- 1 | gen 2 | *.bin 3 | -------------------------------------------------------------------------------- /tools/test/cpp_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 3 | # All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | 18 | set -e 19 | 20 | curdir="$(dirname "${BASH_SOURCE[0]}")" 21 | 22 | if [ -z $MAIN_ROOT ];then 23 | if [ $curdir == "." ];then 24 | source ../../env.sh 25 | else 26 | source env.sh 27 | fi 28 | fi 29 | 30 | if [ ! -d $MAIN_ROOT/deltann/.gen ];then 31 | pushd $MAIN_ROOT/deltann && ./build.sh linux x86_64 TF && popd 32 | fi 33 | 34 | pushd $MAIN_ROOT/tools/test && make clean && make && popd 35 | 36 | set -x 37 | 38 | #echo $(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 39 | #LD_LIBRARY_PATH=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') $MAIN_ROOT/tools/test/test_main.bin 40 | 41 | LD_LIBRARY_PATH=$MAIN_ROOT/tools/tensorflow/bazel-bin/tensorflow $MAIN_ROOT/tools/test/test_main.bin 42 | -------------------------------------------------------------------------------- /tools/test/gen_mock_egs_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 3 | # All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | 18 | 19 | if [ -z $MAIN_ROOT ];then 20 | source env.sh 21 | fi 22 | 23 | tmpfile=`mktemp /tmp/mock_run.XXXXXX` 24 | 25 | find $MAIN_ROOT/egs/mock_* -name run.sh &> $tmpfile 26 | 27 | retcode=0 28 | while read line 29 | do 30 | echo "Run $line" 31 | rundir=`dirname "$line"` 32 | cd $rundir 33 | bash run.sh 34 | if [ $? != 0 ];then 35 | echo "Run $line error." > /dev/stderr 36 | retcode=1 37 | fi 38 | done < $tmpfile 39 | 40 | cd $MAIN_ROOT 41 | exit $retcode 42 | -------------------------------------------------------------------------------- /tools/test/integration_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 3 | # All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | 18 | 19 | if [ -z ${MAIN_ROOT} ];then 20 | if [ -f env.sh ];then 21 | source env.sh 22 | else 23 | source ../../env.sh 24 | fi 25 | fi 26 | 27 | # Set bash to 'debug' mode, it will exit on : 28 | # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', 29 | set -e 30 | set -u 31 | set -o pipefail 32 | 33 | #prepare kaldi 34 | if [ ! -d ${MAIN_ROOT}/tools/kaldi/tools/sph2pipe_v2.5 ]; then 35 | bash ${MAIN_ROOT}/tools/install/prepare_kaldi.sh 36 | fi 37 | 38 | echo "Integration Testing..." 39 | 40 | pushd ${MAIN_ROOT}/egs/mini_an4/asr/v1 41 | bash run_delta.sh || { echo "mini an4 error"; exit 1; } 42 | popd 43 | 44 | echo "Integration Testing Done." 45 | -------------------------------------------------------------------------------- /tools/test/python_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 3 | # All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | 18 | set -e 19 | 20 | if [ -z $MAIN_ROOT ];then 21 | if [ -f env.sh ];then 22 | source env.sh 23 | else 24 | source ../../env.sh 25 | fi 26 | fi 27 | 28 | # generate mock data 29 | bash $MAIN_ROOT/tools/test/gen_mock_egs_data.sh 30 | 31 | # python unist test 32 | tmpfile=`mktemp /tmp/python_test.XXXXXX` 33 | 34 | find $MAIN_ROOT/delta -name *_test.py &> $tmpfile 35 | 36 | retcode=0 37 | while read line 38 | do 39 | echo Testing $line 40 | python3 $line 41 | if [ $? != 0 ];then 42 | echo Test $line error. > /dev/stderr 43 | retcode=1 44 | fi 45 | done < $tmpfile 46 | 47 | if [ $retcode -ne 0 ]; then 48 | exit $retcode 49 | fi 50 | 51 | # integration test 52 | bash $MAIN_ROOT/tools/test/integration_test.sh 53 | 54 | -------------------------------------------------------------------------------- /tools/test/test_main.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==============================================================================*/ 16 | 17 | #include 18 | 19 | #include 20 | 21 | // https://github.com/google/googletest/blob/master/googletest/docs/primer.md 22 | 23 | GTEST_API_ int main(int argc, char** argv) { 24 | std::cout << "Runing main() from test_main.cc\n"; 25 | 26 | ::testing::InitGoogleTest(&argc, argv); 27 | 28 | return RUN_ALL_TESTS(); 29 | } 30 | -------------------------------------------------------------------------------- /tools/valgrind.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# != 2 ];then 4 | echo "usage: $0 [memcheck|massif] [program_bin]" 5 | exit 1 6 | fi 7 | 8 | set -e 9 | 10 | TOOL=$1 11 | BIN=$2 12 | 13 | if [ $(valgrind --version > /dev/null) != 0 ];then 14 | apt-get install valgrind 15 | fi 16 | 17 | if [ $TOOL == "memcheck" ];then 18 | valgrind --leak-check=full --log-file=memcheck.log --show-leak-kinds=all $BIN 19 | elif [ $TOOL == "massif" ];then 20 | valgrind --tool=massif --log-file=massif.log $BIN 21 | ms_print massif.log > massif.txt 22 | fi 23 | -------------------------------------------------------------------------------- /utils/deploy/benchmark_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ROOT=${MAIN_ROOT}/tools/tensorflow 4 | 5 | ${ROOT}/bazel-bin/tensorflow/tools/benchmark/benchmark_model \ 6 | --graph=frozen_graph.pb --show_flops --input_layer=inputs --input_layer_type=float --input_layer_shape=10,3000,40,3 --output_layer=softmax_output 7 | -------------------------------------------------------------------------------- /utils/deploy/convert_frozen_pb_to_tftrt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python3 convert_frozen_pb_to_tftrt.py \ 4 | --input_graph ../model/emotion/frozen_graph_tf.pb \ 5 | --out_tensor frozen_softmax \ 6 | --precision_mode FP32 \ 7 | --batch_size 1024 \ 8 | --gpu 2 9 | 10 | -------------------------------------------------------------------------------- /utils/deploy/dot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #dot -Tpng > foo.png 4 | dot -Tpdf > foo.pdf 5 | -------------------------------------------------------------------------------- /utils/deploy/print_selective_registration_header.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | graph=$1 4 | 5 | ROOT=${MAIN_ROOT}/tools/tensorflow 6 | 7 | ${ROOT}/bazel-bin/tensorflow/python/tools/print_selective_registration_header \ 8 | --graphs=$graph > ops_to_register.h 9 | -------------------------------------------------------------------------------- /utils/deploy/saved_model_cli.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # saved model dir 3 | set -x 4 | 5 | if [[ $# != 1 ]]; then 6 | echo "usage: $0 saved_model_dir" 7 | exit 1 8 | fi 9 | 10 | saved_model_dir=$1 11 | 12 | ROOT=${MAIN_ROOT}/tools/tensorflow 13 | ${ROOT}/bazel-bin/tensorflow/python/tools/saved_model_cli show --dir=$saved_model_dir --all 14 | -------------------------------------------------------------------------------- /utils/deploy/summarize_graph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | graph=$1 4 | 5 | ROOT=${MAIN_ROOT}/tools/tensorflow 6 | 7 | ${ROOT}/bazel-bin/tensorflow/tools/graph_transforms/summarize_graph --in_graph=$graph --print_structure=false 8 | -------------------------------------------------------------------------------- /utils/deploy/tfconvert_frozen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # convert frozen graph to tflite model 3 | 4 | tflite_convert \ 5 | --output_file=foo.tflite \ 6 | --graph_def_file=frozen_graph_tflite.pb \ 7 | --input_arrays=feat \ 8 | --output_arrays=softmax_output 9 | -------------------------------------------------------------------------------- /utils/deploy/tflite_convert.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | import delta.compat as tf 18 | import sys 19 | import os 20 | ''' 21 | python3 tflite_convert.py saved_model_dir 22 | ''' 23 | 24 | os.environ['CUDA_VISIBLE_DEVICES'] = '' 25 | 26 | converter = tf.contrib.lite.TFLiteConverter.from_saved_model( 27 | saved_model_dir=sys.argv[1], input_shapes={'inputs': [5, 2000, 40, 3]}) 28 | converter.dump_graphviz_dir = './lite_dump' 29 | ''' 30 | converter.inference_type=tf.contrib.lite.constants.QUANTIZED_UINT8 31 | converter.quantized_input_stats={'inputs': (127, 1.0/128)} 32 | converter.default_ranges_stats=(0, 6) 33 | ''' 34 | 35 | tflite_model = converter.convert() 36 | with open('model.tflite', 'wb') as f: 37 | f.write(tflite_model) 38 | -------------------------------------------------------------------------------- /utils/deploy/tflite_convert_float_noquant.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# != 1 ]]; then 4 | echo "usage: $0 saved_model_path" 5 | exit 1 6 | fi 7 | 8 | tflite_convert --output_file=graph_float_noquant.tflite \ 9 | --saved_model_dir=$1 \ 10 | --output_format TFLITE \ 11 | --inference_type FLOAT \ 12 | --inference_input_type FLOAT \ 13 | --dump_graphviz_dir graph_dump_float_noquant 14 | -------------------------------------------------------------------------------- /utils/deploy/tflite_convert_float_quant.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# != 1 ]]; then 4 | echo "usage: $0 saved_model_path" 5 | exit 1 6 | fi 7 | 8 | tflite_convert --output_file=graph_float_quant.tflite \ 9 | --saved_model_dir=$1 \ 10 | --output_format TFLITE \ 11 | --inference_type FLOAT \ 12 | --inference_input_type FLOAT \ 13 | --post_training_quantize \ 14 | --dump_graphviz_dir graph_dump_float_quant 15 | -------------------------------------------------------------------------------- /utils/deploy/tflite_convert_int8_noquant.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | if [[ $# != 1 ]]; then 6 | echo "usage: $0 saved_model_path" 7 | exit 1 8 | fi 9 | 10 | mkdir -p graph_dump_int8_noquant 11 | 12 | CUDA_VISIBLE_DEVICES= tflite_convert --output_file=graph_int8_noquant.tflite \ 13 | --saved_model_dir=$1 \ 14 | --output_format TFLITE \ 15 | --inference_type QUANTIZED_UINT8 \ 16 | --inference_input_type QUANTIZED_UINT8 \ 17 | --input_shapes=-1, 3000, 40, 3 \ 18 | --mean_values='127.0' \ 19 | --std_dev_values='0.0078125' \ 20 | --default_ranges_max=6 \ 21 | --default_ranges_min=0 \ 22 | --dump_graphviz_dir graph_dump_int8_noquant 23 | -------------------------------------------------------------------------------- /utils/deploy/tflite_convert_int8_quant.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# != 1 ]]; then 4 | echo "usage: $0 saved_model_path" 5 | exit 1 6 | fi 7 | 8 | tflite_convert --output_file=graph_int8_quant.tflite \ 9 | --saved_model_dir=$1 \ 10 | --output_format TFLITE \ 11 | --inference_type QUANTIZED_UINT8 \ 12 | --inference_input_type QUANTIZED_UINT8 \ 13 | --std_dev_values=1.0 \ 14 | --mean_values=0 \ 15 | --default_ranges_max=6 \ 16 | --default_ranges_min=0 \ 17 | --post_training_quantize \ 18 | --dump_graphviz_dir graph_dump_int8_quant 19 | -------------------------------------------------------------------------------- /utils/deploy/tflite_run.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. 2 | # All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | import delta.compat as tf 18 | import sys 19 | import os 20 | 21 | os.environ['CUDA_VISIBLE_DEVICES'] = '' 22 | 23 | interpreter = tf.contrib.lite.Interpreter(model_path='./model.tflite') 24 | input_details = interpreter.get_input_details() 25 | output_details = interpreter.get_output_details() 26 | print('Inputs', input_details) 27 | print('Outputs', output_details) 28 | 29 | #interpreter.resize_tensor_input(input_details[0]['index'], [2, 2000, 40, 3]) 30 | 31 | input_details = interpreter.get_input_details() 32 | print('Inputs', input_details) 33 | interpreter.allocate_tensors() 34 | interpreter.invoke() 35 | -------------------------------------------------------------------------------- /utils/deploy/transformer_graph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | graph=$1 3 | 4 | # strip_unused_nodes shape for input placeholder 5 | ROOT=${MAIN_ROOT}/tools/tensorflow 6 | 7 | ${ROOT}/bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ 8 | --in_graph=$graph \ 9 | --out_graph=transformed_graph.pb \ 10 | --inputs='input:0' \ 11 | --outputs='softmax_output:0' \ 12 | --transforms=' 13 | add_default_attributes 14 | strip_unused_nodes(type=float, shape="-1,480000") 15 | remove_nodes(op=CheckNumerics) 16 | fold_batch_norms 17 | fold_old_batch_norms 18 | quantize_weights 19 | quantize_nodes 20 | strip_unused_nodes 21 | sort_by_execution_order 22 | ' 23 | #fold_constants(clear_output_shapes=false, ignore_errors=true) # error 24 | #obfuscate_names # ok 25 | -------------------------------------------------------------------------------- /utils/espnet_utils: -------------------------------------------------------------------------------- 1 | ../tools/espnet/utils -------------------------------------------------------------------------------- /utils/frozen_saved_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . ../env.sh 4 | 5 | if [[ $# != 2 ]];then 6 | echo "usage: $0 saved_model_dir output_nodes" 7 | echo " Make sure delta/env.sh is sourced" 8 | exit 1 9 | fi 10 | 11 | # saved model dir 12 | saved_model_dir=$1 13 | output_nodes=$2 14 | 15 | ROOT=${MAIN_ROOT}/tools/tensorflow 16 | python3 ${ROOT}/tensorflow/python/tools/freeze_graph.py \ 17 | --input_saved_model_dir=$saved_model_dir\ 18 | --saved_model_tags='serve' \ 19 | --output_node_names=$output_nodes\ 20 | --output_graph='frozen_graph.pb' 21 | -------------------------------------------------------------------------------- /utils/inspect_saved_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # inspect saved model 3 | 4 | # https://www.tensorflow.org/beta/guide/saved_model#saved_model_cli 5 | # https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/tutorials/Serving_REST_simple.ipynb 6 | 7 | set -e 8 | 9 | if [ $# != 1 ];then 10 | echo "usage: $0 saved_model_dir" 11 | exit 1 12 | fi 13 | 14 | saved_model_dir=$1 15 | 16 | echo "inspect model: [$PWD/$saved_model_dir]" 17 | 18 | echo "show" 19 | saved_model_cli show --dir $saved_model_dir 20 | echo 21 | echo "show serve" 22 | saved_model_cli show --dir $saved_model_dir --tag_set serve 23 | echo 24 | echo "show serve serving_default" 25 | saved_model_cli show --dir $saved_model_dir --tag_set serve --signature_def serving_default 26 | echo 27 | echo "show all" 28 | saved_model_cli show --dir $saved_model_dir --all 29 | echo 30 | -------------------------------------------------------------------------------- /utils/speech/ctc_token_fst.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Apache 2.0 4 | 5 | import sys 6 | 7 | fread = open(sys.argv[1], 'r') 8 | 9 | print('0 1 ') 10 | print('1 1 ') 11 | print('2 2 ') 12 | print('2 0 ') 13 | 14 | nodeX = 3 15 | for entry in fread.readlines(): 16 | entry = entry.replace('\n', '').strip() 17 | fields = entry.split(' ') 18 | phone = fields[0] 19 | if phone == '' or phone == '': 20 | continue 21 | 22 | if '#' in phone: 23 | print(str(0) + ' ' + str(0) + ' ' + '' + ' ' + phone) 24 | else: 25 | print(str(1) + ' ' + str(nodeX) + ' ' + phone + ' ' + phone) 26 | print(str(nodeX) + ' ' + str(nodeX) + ' ' + phone + ' ') 27 | print(str(nodeX) + ' ' + str(2) + ' ' + ' ') 28 | nodeX += 1 29 | print('0') 30 | 31 | fread.close() 32 | --------------------------------------------------------------------------------