├── ADE ├── ade │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── input_field.py │ │ ├── model_check.py │ │ └── save_load_io.py │ ├── evaluate.py │ ├── prepare_data_and_model.py │ └── reader.py ├── data │ ├── input │ │ └── input.md │ ├── output │ │ └── output.md │ ├── inference_models │ │ └── inference_models.md │ ├── saved_models │ │ └── saved_models.md │ ├── pretrain_model │ │ └── pretrain_model.md │ └── config │ │ └── ade.yaml ├── main.py ├── .run_ce.sh ├── _ce.py ├── eval.py ├── inference_model.py ├── ade_net.py ├── predict.py ├── run.sh └── train.py ├── DAM ├── bin │ ├── __init__.py │ ├── test_and_evaluate.py │ └── train_and_evaluate.py ├── models │ ├── __init__.py │ ├── self_match_net.py │ ├── last_net.py │ ├── cross_match_net.py │ └── net.py ├── utils │ ├── __init__.py │ ├── evaluation.py │ ├── douban_evaluation.py │ └── reader.py ├── log │ └── ReadMe.txt ├── output │ └── ReadMe.txt ├── run.sh ├── appendix │ ├── Figure1.png │ └── Figure2.png ├── data │ └── ReadMe.txt ├── main.py └── README.md ├── DGU ├── dgu │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── py23.py │ │ ├── input_field.py │ │ ├── model_check.py │ │ ├── fp16.py │ │ └── save_load_io.py │ ├── scripts │ │ ├── README.md │ │ ├── conf │ │ │ └── mrda.conf │ │ ├── run_build_data.py │ │ ├── commonlib.py │ │ ├── build_mrda_dataset.py │ │ ├── build_dstc2_dataset.py │ │ ├── build_atis_dataset.py │ │ └── build_swda_dataset.py │ ├── prepare_data_and_model.py │ ├── define_predict_pack.py │ ├── optimization.py │ ├── define_paradigm.py │ ├── batching.py │ └── bert.py ├── data │ ├── input │ │ └── input.md │ ├── output │ │ └── output.md │ ├── inference_models │ │ └── inference_models.md │ ├── saved_models │ │ └── saved_models.md │ ├── pretrain_model │ │ └── pretrain_model.md │ └── config │ │ └── dgu.yaml ├── images │ └── dgu.png ├── eval.py ├── main.py ├── dgu_net.py ├── .run_ce.sh ├── _ce.py ├── inference_model.py ├── run.sh └── predict.py ├── .DS_Store ├── other └── ld.jpg └── README.md /ADE/ade/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DAM/bin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DAM/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DAM/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DGU/dgu/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ADE/ade/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DGU/dgu/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DAM/log/ReadMe.txt: -------------------------------------------------------------------------------- 1 | store logs here. -------------------------------------------------------------------------------- /DAM/output/ReadMe.txt: -------------------------------------------------------------------------------- 1 | store models here. -------------------------------------------------------------------------------- /ADE/data/input/input.md: -------------------------------------------------------------------------------- 1 | training data directory 2 | -------------------------------------------------------------------------------- /DGU/data/input/input.md: -------------------------------------------------------------------------------- 1 | input train and test data directory 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/.DS_Store -------------------------------------------------------------------------------- /ADE/data/output/output.md: -------------------------------------------------------------------------------- 1 | save predict results output directory 2 | -------------------------------------------------------------------------------- /DGU/data/output/output.md: -------------------------------------------------------------------------------- 1 | save predict results output directory 2 | -------------------------------------------------------------------------------- /other/ld.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/other/ld.jpg -------------------------------------------------------------------------------- /ADE/data/inference_models/inference_models.md: -------------------------------------------------------------------------------- 1 | save inference model directory 2 | -------------------------------------------------------------------------------- /DAM/run.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | CUDA_VISIBLE_DEVICES=0 python main.py 3 | 4 | 5 | -------------------------------------------------------------------------------- /DGU/data/inference_models/inference_models.md: -------------------------------------------------------------------------------- 1 | save inference model directory 2 | -------------------------------------------------------------------------------- /DGU/images/dgu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DGU/images/dgu.png -------------------------------------------------------------------------------- /DAM/appendix/Figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DAM/appendix/Figure1.png -------------------------------------------------------------------------------- /DAM/appendix/Figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DAM/appendix/Figure2.png -------------------------------------------------------------------------------- /ADE/data/saved_models/saved_models.md: -------------------------------------------------------------------------------- 1 | save user finetuning models and trained model we provided 2 | -------------------------------------------------------------------------------- /DGU/data/saved_models/saved_models.md: -------------------------------------------------------------------------------- 1 | save user finetuning models and trained model we provided 2 | -------------------------------------------------------------------------------- /ADE/data/pretrain_model/pretrain_model.md: -------------------------------------------------------------------------------- 1 | pretrain model directory: model for network initialization 2 | -------------------------------------------------------------------------------- /DGU/data/pretrain_model/pretrain_model.md: -------------------------------------------------------------------------------- 1 | pretrain model directory: in this module, we use bert as pretrain model 2 | -------------------------------------------------------------------------------- /DAM/data/ReadMe.txt: -------------------------------------------------------------------------------- 1 | please download data from: 2 | 3 | https://pan.baidu.com/s/1hakfuuwdS8xl7NyxlWzRiQ 4 | 5 | and unzip it in this floder. 6 | -------------------------------------------------------------------------------- /ADE/data/config/ade.yaml: -------------------------------------------------------------------------------- 1 | loss_type: "CLS" 2 | training_file: "" 3 | val_file: "" 4 | predict_file: "" 5 | print_steps: 10 6 | save_steps: 10 7 | num_scan_data: "" 8 | word_emb_init: "" 9 | init_model: "" 10 | use_cuda: True 11 | batch_size: 256 12 | hidden_size: 256 13 | emb_size: 256 14 | vocab_size: 484016 15 | sample_pro: 1.0 16 | output_prediction_file: "" 17 | init_from_checkpoint: "" 18 | init_from_params: "" 19 | init_from_pretrain_model: "" 20 | inference_model_dir: "" 21 | save_model_path: "" 22 | save_checkpoint: "" 23 | save_param: "" 24 | evaluation_file: "" 25 | vocab_path: "" 26 | max_seq_len: 128 27 | random_seed: 110 28 | do_save_inference_model: False 29 | enable_ce: "" 30 | -------------------------------------------------------------------------------- /DGU/data/config/dgu.yaml: -------------------------------------------------------------------------------- 1 | task_name: "" 2 | data_dir: "" 3 | bert_config_path: "" 4 | init_from_checkpoint: "" 5 | init_from_params: "" 6 | init_from_pretrain_model: "" 7 | inference_model_dir: "" 8 | save_model_path: "" 9 | save_checkpoint: "" 10 | save_param: "" 11 | lr_scheduler: "linear_warmup_decay" 12 | weight_decay: 0.01 13 | warmup_proportion: 0.1 14 | save_steps: 1000 15 | use_fp16: False 16 | loss_scaling: 1.0 17 | print_steps: 20 18 | evaluation_file: "" 19 | output_prediction_file: "" 20 | vocab_path: "" 21 | max_seq_len: 128 22 | batch_size: 2 23 | verbose: False 24 | do_lower_case: False 25 | random_seed: 0 26 | use_cuda: True 27 | in_tokens: False 28 | do_save_inference_model: False 29 | enable_ce: "" 30 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/README.md: -------------------------------------------------------------------------------- 1 | scripts:运行数据处理脚本目录, 将官方公开数据集转换成模型所需训练数据格式 2 | 运行命令: 3 | python run_build_data.py [udc|swda|mrda|atis|dstc2] 4 | 5 | 1)、生成MATCHING任务所需要的训练集、开发集、测试集时: 6 | python run_build_data.py udc 7 | 生成数据在dialogue_general_understanding/data/input/data/udc 8 | 9 | 2)、生成DA任务所需要的训练集、开发集、测试集时: 10 | python run_build_data.py swda 11 | python run_build_data.py mrda 12 | 生成数据分别在dialogue_general_understanding/data/input/data/swda和dialogue_general_understanding/data/input/data/mrda 13 | 14 | 3)、生成DST任务所需的训练集、开发集、测试集时: 15 | python run_build_data.py dstc2 16 | 生成数据分别在dialogue_general_understanding/data/input/data/dstc2 17 | 18 | 4)、生成意图解析, 槽位识别任务所需训练集、开发集、测试集时: 19 | python run_build_data.py atis 20 | 生成槽位识别数据在dialogue_general_understanding/data/input/data/atis/atis_slot 21 | 生成意图识别数据在dialogue_general_understanding/data/input/data/atis/atis_intent 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /DGU/dgu/utils/py23.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import sys 17 | 18 | if sys.version[0] == '2': 19 | rt_tok = u'\n' 20 | tab_tok = u'\t' 21 | space_tok = u' ' 22 | else: 23 | rt_tok = '\n' 24 | tab_tok = '\t' 25 | space_tok = ' ' 26 | -------------------------------------------------------------------------------- /DAM/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | def get_p_at_n_in_m(data, n, m, ind): 4 | pos_score = data[ind][0]; 5 | curr = data[ind:ind+m]; 6 | curr = sorted(curr, key = lambda x:x[0], reverse=True) 7 | 8 | if curr[n-1][0] <= pos_score: 9 | return 1; 10 | return 0; 11 | 12 | def evaluate(file_path): 13 | data = [] 14 | with open(file_path, 'r') as file: 15 | for line in file: 16 | line = line.strip(); 17 | tokens = line.split("\t") 18 | 19 | if len(tokens) != 2: 20 | continue 21 | 22 | data.append((float(tokens[0]), int(tokens[1]))); 23 | 24 | #assert len(data) % 10 == 0 25 | 26 | p_at_1_in_2 = 0.0 27 | p_at_1_in_10 = 0.0 28 | p_at_2_in_10 = 0.0 29 | p_at_5_in_10 = 0.0 30 | 31 | length = int(len(data)/10) 32 | 33 | for i in xrange(0, length): 34 | ind = i * 10 35 | assert data[ind][1] == 1 36 | 37 | p_at_1_in_2 += get_p_at_n_in_m(data, 1, 2, ind) 38 | p_at_1_in_10 += get_p_at_n_in_m(data, 1, 10, ind) 39 | p_at_2_in_10 += get_p_at_n_in_m(data, 2, 10, ind) 40 | p_at_5_in_10 += get_p_at_n_in_m(data, 5, 10, ind) 41 | 42 | return (p_at_1_in_2/length, p_at_1_in_10/length, p_at_2_in_10/length, p_at_5_in_10/length) 43 | 44 | 45 | -------------------------------------------------------------------------------- /ADE/ade/utils/input_field.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import six 21 | import ast 22 | import copy 23 | 24 | import numpy as np 25 | import paddle.fluid as fluid 26 | 27 | 28 | class InputField(object): 29 | def __init__(self, input_field): 30 | """init inpit field""" 31 | self.context_wordseq = input_field[0] 32 | self.response_wordseq = input_field[1] 33 | self.labels = input_field[2] 34 | -------------------------------------------------------------------------------- /DGU/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """evaluation""" 15 | 16 | import os 17 | import sys 18 | 19 | from dgu.evaluation import evaluate 20 | from dgu.utils.configure import PDConfig 21 | 22 | 23 | def do_eval(args): 24 | 25 | task_name = args.task_name.lower() 26 | reference = args.evaluation_file 27 | predicitions = args.output_prediction_file 28 | 29 | evaluate(task_name, predicitions, reference) 30 | 31 | 32 | if __name__ == "__main__": 33 | 34 | args = PDConfig(yaml_file="./data/config/dgu.yaml") 35 | args.build() 36 | 37 | do_eval(args) 38 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/conf/mrda.conf: -------------------------------------------------------------------------------- 1 | train Bdb001 2 | train Bed002 3 | train Bed004 4 | train Bed005 5 | train Bed008 6 | train Bed009 7 | train Bed011 8 | train Bed013 9 | train Bed014 10 | train Bed015 11 | train Bed017 12 | train Bmr002 13 | train Bmr003 14 | train Bmr006 15 | train Bmr007 16 | train Bmr008 17 | train Bmr009 18 | train Bmr011 19 | train Bmr012 20 | train Bmr015 21 | train Bmr016 22 | train Bmr020 23 | train Bmr021 24 | train Bmr023 25 | train Bmr025 26 | train Bmr026 27 | train Bmr027 28 | train Bmr029 29 | train Bmr031 30 | train Bns001 31 | train Bns002 32 | train Bns003 33 | train Bro003 34 | train Bro005 35 | train Bro007 36 | train Bro010 37 | train Bro012 38 | train Bro013 39 | train Bro015 40 | train Bro016 41 | train Bro017 42 | train Bro019 43 | train Bro022 44 | train Bro023 45 | train Bro025 46 | train Bro026 47 | train Bro028 48 | train Bsr001 49 | train Btr001 50 | train Btr002 51 | train Buw001 52 | dev Bed003 53 | dev Bed010 54 | dev Bmr005 55 | dev Bmr014 56 | dev Bmr019 57 | dev Bmr024 58 | dev Bmr030 59 | dev Bro004 60 | dev Bro011 61 | dev Bro018 62 | dev Bro024 63 | test Bed006 64 | test Bed012 65 | test Bed016 66 | test Bmr001 67 | test Bmr010 68 | test Bmr022 69 | test Bmr028 70 | test Bro008 71 | test Bro014 72 | test Bro021 73 | test Bro027 74 | -------------------------------------------------------------------------------- /DGU/dgu/utils/input_field.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import six 21 | import ast 22 | import copy 23 | 24 | import numpy as np 25 | import paddle.fluid as fluid 26 | 27 | 28 | class InputField(object): 29 | def __init__(self, input_field): 30 | """init inpit field""" 31 | self.src_ids = input_field[0] 32 | self.pos_ids = input_field[1] 33 | self.sent_ids = input_field[2] 34 | self.input_mask = input_field[3] 35 | self.labels = input_field[4] 36 | -------------------------------------------------------------------------------- /DAM/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | 5 | import cPickle as pickle 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | import utils.reader as reader 10 | import models.net as net 11 | import utils.evaluation as eva 12 | #for douban 13 | #import utils.douban_evaluation as eva 14 | 15 | import bin.train_and_evaluate as train 16 | import bin.test_and_evaluate as test 17 | 18 | # configure 19 | 20 | conf = { 21 | "data_path": "./data/ubuntu/data.pkl", 22 | "save_path": "./output/ubuntu/temp/", 23 | "word_emb_init": "./data/word_embedding.pkl", 24 | "init_model": None, #should be set for test 25 | 26 | "rand_seed": None, 27 | 28 | "drop_dense": None, 29 | "drop_attention": None, 30 | 31 | "is_mask": True, 32 | "is_layer_norm": True, 33 | "is_positional": False, 34 | 35 | "stack_num": 5, 36 | "attention_type": "dot", 37 | 38 | "learning_rate": 1e-3, 39 | "vocab_size": 434512, 40 | "emb_size": 200, 41 | "batch_size": 256, #200 for test 42 | 43 | "max_turn_num": 9, 44 | "max_turn_len": 50, 45 | 46 | "max_to_keep": 1, 47 | "num_scan_data": 2, 48 | "_EOS_": 28270, #1 for douban data 49 | "final_n_class": 1, 50 | } 51 | 52 | 53 | model = net.Net(conf) 54 | train.train(conf, model) 55 | 56 | #test and evaluation, init_model in conf should be set 57 | #test.test(conf, model) 58 | 59 | -------------------------------------------------------------------------------- /DGU/main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import sys 17 | import numpy as np 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | 22 | from eval import do_eval 23 | from train import do_train 24 | from predict import do_predict 25 | from inference_model import do_save_inference_model 26 | 27 | from dgu.utils.configure import PDConfig 28 | 29 | 30 | if __name__ == "__main__": 31 | 32 | args = PDConfig(yaml_file="./data/config/dgu.yaml") 33 | args.build() 34 | args.Print() 35 | 36 | if args.do_train: 37 | do_train(args) 38 | 39 | if args.do_predict: 40 | do_predict(args) 41 | 42 | if args.do_eval: 43 | do_eval(args) 44 | 45 | if args.do_save_inference_model: 46 | do_save_inference_model(args) 47 | 48 | # vim: set ts=4 sw=4 sts=4 tw=100: 49 | -------------------------------------------------------------------------------- /ADE/ade/utils/model_check.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import paddle 17 | import paddle.fluid as fluid 18 | 19 | 20 | def check_cuda(use_cuda, err = \ 21 | "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ 22 | Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" 23 | ): 24 | try: 25 | if use_cuda == True and fluid.is_compiled_with_cuda() == False: 26 | print(err) 27 | sys.exit(1) 28 | except Exception as e: 29 | pass 30 | 31 | 32 | if __name__ == "__main__": 33 | 34 | check_cuda(True) 35 | 36 | check_cuda(False) 37 | 38 | check_cuda(True, "This is only for testing.") 39 | -------------------------------------------------------------------------------- /DGU/dgu/utils/model_check.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import paddle 17 | import paddle.fluid as fluid 18 | 19 | 20 | def check_cuda(use_cuda, err = \ 21 | "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ 22 | Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" 23 | ): 24 | try: 25 | if use_cuda == True and fluid.is_compiled_with_cuda() == False: 26 | print(err) 27 | sys.exit(1) 28 | except Exception as e: 29 | pass 30 | 31 | 32 | if __name__ == "__main__": 33 | 34 | check_cuda(True) 35 | 36 | check_cuda(False) 37 | 38 | check_cuda(True, "This is only for testing.") 39 | -------------------------------------------------------------------------------- /ADE/main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import sys 17 | import numpy as np 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | 22 | from eval import do_eval 23 | from train import do_train 24 | from predict import do_predict 25 | from inference_model import do_save_inference_model 26 | 27 | from ade.utils.configure import PDConfig 28 | 29 | 30 | if __name__ == "__main__": 31 | 32 | args = PDConfig(yaml_file="./data/config/ade.yaml") 33 | args.build() 34 | args.Print() 35 | 36 | if args.do_train: 37 | do_train(args) 38 | 39 | if args.do_predict: 40 | do_predict(args) 41 | 42 | if args.do_eval: 43 | do_eval(args) 44 | 45 | if args.do_save_inference_model: 46 | do_save_inference_model(args) 47 | 48 | # vim: set ts=4 sw=4 sts=4 tw=100: 49 | -------------------------------------------------------------------------------- /ADE/.run_ce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export FLAGS_sync_nccl_allreduce=0 4 | export FLAGS_eager_delete_tensor_gb=1.0 5 | 6 | export CUDA_VISIBLE_DEVICES=0 7 | 8 | pretrain_model_path="data/saved_models/matching_pretrained" 9 | if [ ! -d ${pretrain_model_path} ] 10 | then 11 | mkdir ${pretrain_model_path} 12 | fi 13 | 14 | python -u main.py \ 15 | --do_train=true \ 16 | --use_cuda=true \ 17 | --loss_type="CLS" \ 18 | --max_seq_len=50 \ 19 | --save_model_path="data/saved_models/matching_pretrained" \ 20 | --save_param="params" \ 21 | --training_file="data/input/data/unlabel_data/train.ids" \ 22 | --epoch=3 \ 23 | --print_step=1 \ 24 | --save_step=400 \ 25 | --batch_size=256 \ 26 | --hidden_size=256 \ 27 | --emb_size=256 \ 28 | --vocab_size=484016 \ 29 | --learning_rate=0.001 \ 30 | --sample_pro=0.1 \ 31 | --enable_ce="store_true" | python _ce.py 32 | 33 | 34 | export CUDA_VISIBLE_DEVICES=0,1,2,3 35 | 36 | python -u main.py \ 37 | --do_train=true \ 38 | --use_cuda=true \ 39 | --loss_type="CLS" \ 40 | --max_seq_len=50 \ 41 | --save_model_path="data/saved_models/matching_pretrained" \ 42 | --save_param="params" \ 43 | --training_file="data/input/data/unlabel_data/train.ids" \ 44 | --epoch=3 \ 45 | --print_step=1 \ 46 | --save_step=400 \ 47 | --batch_size=256 \ 48 | --hidden_size=256 \ 49 | --emb_size=256 \ 50 | --vocab_size=484016 \ 51 | --learning_rate=0.001 \ 52 | --sample_pro=0.1 \ 53 | --enable_ce="store_true" | python _ce.py 54 | 55 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/run_build_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import shutil 15 | import sys 16 | import os 17 | 18 | from build_atis_dataset import ATIS 19 | from build_dstc2_dataset import DSTC2 20 | from build_mrda_dataset import MRDA 21 | from build_swda_dataset import SWDA 22 | 23 | 24 | if __name__ == "__main__": 25 | task_name = sys.argv[1] 26 | task_name = task_name.lower() 27 | 28 | if task_name not in ['swda', 'mrda', 'atis', 'dstc2', 'udc']: 29 | print("task name error: we support [swda|mrda|atis|dstc2|udc]") 30 | exit(1) 31 | 32 | if task_name == 'swda': 33 | swda_inst = SWDA() 34 | swda_inst.main() 35 | elif task_name == 'mrda': 36 | mrda_inst = MRDA() 37 | mrda_inst.main() 38 | elif task_name == 'atis': 39 | atis_inst = ATIS() 40 | atis_inst.main() 41 | shutil.copyfile("../../data/input/data/atis/atis_slot/test.txt", "../../data/input/data/atis/atis_slot/dev.txt") 42 | shutil.copyfile("../../data/input/data/atis/atis_intent/test.txt", "../../data/input/data/atis/atis_intent/dev.txt") 43 | elif task_name == 'dstc2': 44 | dstc_inst = DSTC2() 45 | dstc_inst.main() 46 | else: 47 | exit(0) 48 | 49 | -------------------------------------------------------------------------------- /DGU/dgu_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Create model for dialogue task.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import paddle.fluid as fluid 21 | 22 | from dgu.bert import BertModel 23 | from dgu.utils.configure import JsonConfig 24 | 25 | 26 | def create_net( 27 | is_training, 28 | model_input, 29 | num_labels, 30 | paradigm_inst, 31 | args): 32 | """create dialogue task model""" 33 | 34 | src_ids = model_input.src_ids 35 | pos_ids = model_input.pos_ids 36 | sent_ids = model_input.sent_ids 37 | input_mask = model_input.input_mask 38 | labels = model_input.labels 39 | 40 | assert isinstance(args.bert_config_path, str) 41 | 42 | bert_conf = JsonConfig(args.bert_config_path) 43 | bert = BertModel( 44 | src_ids=src_ids, 45 | position_ids=pos_ids, 46 | sentence_ids=sent_ids, 47 | input_mask=input_mask, 48 | config=bert_conf, 49 | use_fp16=False) 50 | 51 | params = {'num_labels': num_labels, 52 | 'src_ids': src_ids, 53 | 'pos_ids': pos_ids, 54 | 'sent_ids': sent_ids, 55 | 'input_mask': input_mask, 56 | 'labels': labels, 57 | 'is_training': is_training} 58 | 59 | results = paradigm_inst.paradigm(bert, params) 60 | return results 61 | 62 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/commonlib.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """common function""" 16 | import sys 17 | import io 18 | import os 19 | 20 | 21 | def get_file_list(dir_name): 22 | """ 23 | get file list in directory 24 | """ 25 | file_list = list() 26 | file_path = list() 27 | for root, dirs, files in os.walk(dir_name): 28 | for file in files: 29 | file_list.append(file) 30 | file_path.append(os.path.join(root, file)) 31 | return file_list, file_path 32 | 33 | 34 | def get_dir_list(dir_name): 35 | """ 36 | get directory names 37 | """ 38 | child_dir = [] 39 | dir_list = os.listdir(dir_name) 40 | for cur_file in dir_list: 41 | path = os.path.join(dir_name, cur_file) 42 | if not os.path.isdir(path): 43 | continue 44 | child_dir.append(path) 45 | return child_dir 46 | 47 | 48 | def load_dict(conf): 49 | """ 50 | load swda dataset config 51 | """ 52 | conf_dict = dict() 53 | fr = io.open(conf, 'r', encoding="utf8") 54 | for line in fr: 55 | line = line.strip() 56 | elems = line.split('\t') 57 | if elems[0] not in conf_dict: 58 | conf_dict[elems[0]] = [] 59 | conf_dict[elems[0]].append(elems[1]) 60 | return conf_dict 61 | 62 | 63 | def load_voc(conf): 64 | """ 65 | load map dict 66 | """ 67 | map_dict = {} 68 | fr = io.open(conf, 'r', encoding="utf8") 69 | for line in fr: 70 | line = line.strip() 71 | elems = line.split('\t') 72 | map_dict[elems[0]] = elems[1] 73 | return map_dict 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /ADE/ade/evaluate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Evaluation for auto dialogue evaluation""" 15 | 16 | import sys 17 | import numpy as np 18 | import pandas as pd 19 | 20 | 21 | def get_p_at_n_in_m(data, n, m, ind): 22 | """ 23 | Get n in m 24 | """ 25 | pos_score = data[ind][0] 26 | curr = data[ind:ind + m] 27 | curr = sorted(curr, key=lambda x: x[0], reverse=True) 28 | 29 | if curr[n - 1][0] <= pos_score: 30 | return 1 31 | return 0 32 | 33 | 34 | def evaluate_Recall(data): 35 | """ 36 | Evaluate Recall 37 | """ 38 | p_at_1_in_2 = 0.0 39 | p_at_1_in_10 = 0.0 40 | p_at_2_in_10 = 0.0 41 | p_at_5_in_10 = 0.0 42 | 43 | length = len(data) // 10 44 | print('length=%s' % length) 45 | 46 | for i in range(0, length): 47 | ind = i * 10 48 | assert data[ind][1] == 1 49 | 50 | p_at_1_in_2 += get_p_at_n_in_m(data, 1, 2, ind) 51 | p_at_1_in_10 += get_p_at_n_in_m(data, 1, 10, ind) 52 | p_at_2_in_10 += get_p_at_n_in_m(data, 2, 10, ind) 53 | p_at_5_in_10 += get_p_at_n_in_m(data, 5, 10, ind) 54 | 55 | recall_dict = { 56 | '1_in_2': p_at_1_in_2 / length, 57 | '1_in_10': p_at_1_in_10 / length, 58 | '2_in_10': p_at_2_in_10 / length, 59 | '5_in_10': p_at_5_in_10 / length 60 | } 61 | 62 | return recall_dict 63 | 64 | 65 | def evaluate_cor(pred, true): 66 | """ 67 | Evaluate cor 68 | """ 69 | df = pd.DataFrame({'pred': pred, 'true': true}) 70 | cor_matrix = df.corr('spearman') 71 | return cor_matrix['pred']['true'] 72 | -------------------------------------------------------------------------------- /DGU/.run_ce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | train_atis_slot(){ 4 | if [ ! -d "./data/saved_models/atis_slot" ]; then 5 | mkdir "./data/saved_models/atis_slot" 6 | fi 7 | python -u train.py \ 8 | --task_name=atis_slot \ 9 | --use_cuda=true \ 10 | --do_train=true \ 11 | --in_tokens=false \ 12 | --epoch=2 \ 13 | --batch_size=32 \ 14 | --data_dir=./data/input/data/atis/atis_slot \ 15 | --bert_config_path=./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json \ 16 | --vocab_path=./data/pretrain_model/uncased_L-12_H-768_A-12/vocab.txt \ 17 | --init_from_pretrain_model=./data/pretrain_model/uncased_L-12_H-768_A-12/params \ 18 | --save_model_path=./data/saved_models/atis_slot \ 19 | --save_param="params" \ 20 | --save_steps=100 \ 21 | --learning_rate=2e-5 \ 22 | --weight_decay=0.01 \ 23 | --max_seq_len=128 \ 24 | --print_steps=10 \ 25 | --use_fp16=false \ 26 | --enable_ce=store_true 27 | } 28 | 29 | train_mrda(){ 30 | if [ ! -d "./data/saved_models/mrda" ]; then 31 | mkdir "./data/saved_models/mrda" 32 | fi 33 | python -u train.py \ 34 | --task_name=mrda \ 35 | --use_cuda=true \ 36 | --do_train=true \ 37 | --in_tokens=true \ 38 | --epoch=2 \ 39 | --batch_size=4096 \ 40 | --data_dir=./data/input/data/mrda \ 41 | --bert_config_path=./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json \ 42 | --vocab_path=./data/pretrain_model/uncased_L-12_H-768_A-12/vocab.txt \ 43 | --init_from_pretrain_model=./data/pretrain_model/uncased_L-12_H-768_A-12/params \ 44 | --save_model_path=./data/saved_models/mrda \ 45 | --save_param="params" \ 46 | --save_steps=500 \ 47 | --learning_rate=2e-5 \ 48 | --weight_decay=0.01 \ 49 | --max_seq_len=128 \ 50 | --print_steps=200 \ 51 | --use_fp16=false \ 52 | --enable_ce=store_true 53 | } 54 | 55 | # FIXME(zjl): this model would fail when GC is enabled, 56 | # but it seems that this error is from the model itself. 57 | # See issue here: https://github.com/PaddlePaddle/Paddle/issues/18994#event-2532039900 58 | # To fix ce, disable gc in this model temporarily. 59 | export FLAGS_eager_delete_tensor_gb=1 60 | 61 | cudaid=${multi:=0,1,2,3} 62 | export CUDA_VISIBLE_DEVICES=$cudaid 63 | train_atis_slot | python _ce.py 64 | sleep 20 65 | 66 | cudaid=${single:=0} 67 | export CUDA_VISIBLE_DEVICES=$cudaid 68 | train_atis_slot | python _ce.py 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 2 | 3 | # New record is achieved by ERNIE_English (2019/06/13) 4 | We got the new, best score of R_10 at 1 (**85.67%**) in the Ubuntu Corpus by incorporating ERNIE_English, an English pre-trained model from Baidu. 5 | Please refer to DMTK (the Dialogue Modeling ToolKit) for more details. 6 | https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/PaddleDialogue 7 | 8 | 9 | # Baidu NLP Dialogue team 10 | The dialogue team, at Baidu NLP, is a group of engineers and researchers who truly trust in technology and work together to accelerate the development of open-domain dialogues. 11 | 12 | Our battlefields include but not limited to the fundamental technology of neural dialogue system (seq2seq generation or context-response matching), knowledge-driven dialogue, life-long learning dialogue system with reinforcement-learning, and also we provide the system-level solution for open-domain chatbots. 13 | 14 | Together we built the largest Chinese Human-Computer conversation systems and support many businesses such as DuerOS, the largest Chatbot in China, our life-long learning system interacts with hundreds of millions of Chinese users every day and learns through imitation/user-feedback, distilling knowledge from the conversation and learning to be smarter. 15 | 16 | We will release some source code of our previous work in the future, to make some small contribution to the whole community of human-computer conversation. 17 | 18 | # Publication 19 | > + [Proactive Human-Machine Conversation with Explicit Conversation Goals](https://arxiv.org/abs/1906.05572). *ACL 2019, Full Paper, poster* 20 | > + [Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network](http://aclweb.org/anthology/P18-1103). *ACL 2018, Full Paper, oral* 21 | > + [Multi-View Response Selection for Human-Computer Conversation](http://www.aclweb.org/anthology/D16-1036). *EMNLP 2016, Full Paper, poster* 22 | > + [Shall I be Your Chat Companion towards an Online Human-Computer Conversation System](http://research.baidu.com/Public/uploads/5acc2a6723f1d.pdf). *CIKM 2016, Full Paper, oral* 23 | 24 | # Connected to our Chatbot Service 25 | Any Chinese developers can enable their own smart devices to talk with customers on open-domain topics by using our open chatbot service. Please find the usage manual at http://ai.baidu.com/forum/topic/show/497679 (in Chinese). 26 | 27 | -------------------------------------------------------------------------------- /ADE/_ce.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """this file is only used for continuous evaluation test!""" 15 | 16 | import os 17 | import sys 18 | sys.path.append(os.environ['ceroot']) 19 | from kpi import CostKpi 20 | from kpi import DurationKpi 21 | 22 | train_loss_card1 = CostKpi('train_loss_card1', 0.03, 0, actived=True) 23 | train_loss_card4 = CostKpi('train_loss_card4', 0.03, 0, actived=True) 24 | train_duration_card1 = DurationKpi('train_duration_card1', 0.01, 0, actived=True) 25 | train_duration_card4 = DurationKpi('train_duration_card4', 0.01, 0, actived=True) 26 | 27 | tracking_kpis = [ 28 | train_loss_card1, 29 | train_loss_card4, 30 | train_duration_card1, 31 | train_duration_card4, 32 | ] 33 | 34 | 35 | def parse_log(log): 36 | ''' 37 | This method should be implemented by model developers. 38 | 39 | The suggestion: 40 | 41 | each line in the log should be key, value, for example: 42 | 43 | " 44 | train_cost\t1.0 45 | test_cost\t1.0 46 | train_cost\t1.0 47 | train_cost\t1.0 48 | train_acc\t1.2 49 | " 50 | ''' 51 | for line in log.split('\n'): 52 | fs = line.strip().split('\t') 53 | print(fs) 54 | if len(fs) == 3 and fs[0] == 'kpis': 55 | kpi_name = fs[1] 56 | kpi_value = float(fs[2]) 57 | yield kpi_name, kpi_value 58 | 59 | 60 | def log_to_ce(log): 61 | kpi_tracker = {} 62 | for kpi in tracking_kpis: 63 | kpi_tracker[kpi.name] = kpi 64 | 65 | for (kpi_name, kpi_value) in parse_log(log): 66 | print(kpi_name, kpi_value) 67 | kpi_tracker[kpi_name].add_record(kpi_value) 68 | kpi_tracker[kpi_name].persist() 69 | 70 | 71 | if __name__ == '__main__': 72 | log = sys.stdin.read() 73 | log_to_ce(log) 74 | -------------------------------------------------------------------------------- /ADE/ade/prepare_data_and_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import tarfile 17 | import shutil 18 | import urllib 19 | import sys 20 | import io 21 | import os 22 | 23 | URLLIB=urllib 24 | if sys.version_info >= (3, 0): 25 | import urllib.request 26 | URLLIB=urllib.request 27 | 28 | DATA_MODEL_PATH = {"DATA_PATH": "https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz", 29 | "TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.2.0.0.tar.gz"} 30 | 31 | PATH_MAP = {'DATA_PATH': "./data/input", 32 | 'TRAINED_MODEL': './data/saved_models'} 33 | 34 | 35 | def un_tar(tar_name, dir_name): 36 | try: 37 | t = tarfile.open(tar_name) 38 | t.extractall(path = dir_name) 39 | return True 40 | except Exception as e: 41 | print(e) 42 | return False 43 | 44 | 45 | def download_model_and_data(): 46 | print("Downloading ade data, pretrain model and trained models......") 47 | print("This process is quite long, please wait patiently............") 48 | for path in ['./data/input/data', './data/saved_models/trained_models']: 49 | if not os.path.exists(path): 50 | continue 51 | shutil.rmtree(path) 52 | for path_key in DATA_MODEL_PATH: 53 | filename = os.path.basename(DATA_MODEL_PATH[path_key]) 54 | URLLIB.urlretrieve(DATA_MODEL_PATH[path_key], os.path.join("./", filename)) 55 | state = un_tar(filename, PATH_MAP[path_key]) 56 | if not state: 57 | print("Tar %s error....." % path_key) 58 | return False 59 | os.remove(filename) 60 | return True 61 | 62 | 63 | if __name__ == "__main__": 64 | state = download_model_and_data() 65 | if not state: 66 | exit(1) 67 | print("Downloading data and models sucess......") 68 | -------------------------------------------------------------------------------- /ADE/eval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """evaluation metrics""" 16 | 17 | import io 18 | import os 19 | import sys 20 | import numpy as np 21 | 22 | import ade.evaluate as evaluate 23 | from ade.utils.configure import PDConfig 24 | 25 | 26 | def do_eval(args): 27 | """evaluate metrics""" 28 | labels = [] 29 | fr = io.open(args.evaluation_file, 'r', encoding="utf8") 30 | for line in fr: 31 | tokens = line.strip().split('\t') 32 | assert len(tokens) == 3 33 | label = int(tokens[2]) 34 | labels.append(label) 35 | 36 | scores = [] 37 | fr = io.open(args.output_prediction_file, 'r', encoding="utf8") 38 | for line in fr: 39 | tokens = line.strip().split('\t') 40 | assert len(tokens) == 2 41 | score = tokens[1].strip("[]").split() 42 | score = np.array(score) 43 | score = score.astype(np.float64) 44 | scores.append(score) 45 | 46 | if args.loss_type == 'CLS': 47 | recall_dict = evaluate.evaluate_Recall(list(zip(scores, labels))) 48 | mean_score = sum(scores) / len(scores) 49 | print('mean score: %.6f' % mean_score) 50 | print('evaluation recall result:') 51 | print('1_in_2: %.6f\t1_in_10: %.6f\t2_in_10: %.6f\t5_in_10: %.6f' % 52 | (recall_dict['1_in_2'], recall_dict['1_in_10'], 53 | recall_dict['2_in_10'], recall_dict['5_in_10'])) 54 | elif args.loss_type == 'L2': 55 | scores = [x[0] for x in scores] 56 | mean_score = sum(scores) / len(scores) 57 | cor = evaluate.evaluate_cor(scores, labels) 58 | print('mean score: %.6f\nevaluation cor results:%.6f' % 59 | (mean_score, cor)) 60 | else: 61 | raise ValueError 62 | 63 | 64 | if __name__ == "__main__": 65 | args = PDConfig(yaml_file="./data/config/ade.yaml") 66 | args.build() 67 | 68 | do_eval(args) 69 | -------------------------------------------------------------------------------- /DGU/dgu/prepare_data_and_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import tarfile 17 | import shutil 18 | import urllib 19 | import sys 20 | import io 21 | import os 22 | 23 | 24 | URLLIB=urllib 25 | if sys.version_info >= (3, 0): 26 | import urllib.request 27 | URLLIB=urllib.request 28 | 29 | DATA_MODEL_PATH = {"DATA_PATH": "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz", 30 | "PRETRAIN_MODEL": "https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz", 31 | "TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/dgu_models_2.0.0.tar.gz"} 32 | 33 | PATH_MAP = {'DATA_PATH': "./data/input", 34 | 'PRETRAIN_MODEL': './data/pretrain_model', 35 | 'TRAINED_MODEL': './data/saved_models'} 36 | 37 | 38 | def un_tar(tar_name, dir_name): 39 | try: 40 | t = tarfile.open(tar_name) 41 | t.extractall(path = dir_name) 42 | return True 43 | except Exception as e: 44 | print(e) 45 | return False 46 | 47 | 48 | def download_model_and_data(): 49 | print("Downloading dgu data, pretrain model and trained models......") 50 | print("This process is quite long, please wait patiently............") 51 | for path in ['./data/input/data', './data/pretrain_model/uncased_L-12_H-768_A-12', './data/saved_models/trained_models']: 52 | if not os.path.exists(path): 53 | continue 54 | shutil.rmtree(path) 55 | for path_key in DATA_MODEL_PATH: 56 | filename = os.path.basename(DATA_MODEL_PATH[path_key]) 57 | URLLIB.urlretrieve(DATA_MODEL_PATH[path_key], os.path.join("./", filename)) 58 | state = un_tar(filename, PATH_MAP[path_key]) 59 | if not state: 60 | print("Tar %s error....." % path_key) 61 | return False 62 | os.remove(filename) 63 | return True 64 | 65 | 66 | if __name__ == "__main__": 67 | state = download_model_and_data() 68 | if not state: 69 | exit(1) 70 | print("Downloading data and models sucess......") 71 | -------------------------------------------------------------------------------- /DGU/dgu/define_predict_pack.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """define prediction results""" 15 | 16 | import re 17 | import sys 18 | import numpy as np 19 | 20 | import paddle 21 | import paddle.fluid as fluid 22 | 23 | 24 | class DefinePredict(object): 25 | """ 26 | Packaging Prediction Results 27 | """ 28 | def __init__(self): 29 | """ 30 | init 31 | """ 32 | self.task_map = {'udc': 'get_matching_res', 33 | 'swda': 'get_cls_res', 34 | 'mrda': 'get_cls_res', 35 | 'atis_intent': 'get_cls_res', 36 | 'atis_slot': 'get_sequence_tagging', 37 | 'dstc2': 'get_multi_cls_res', 38 | 'dstc2_asr': 'get_multi_cls_res', 39 | 'multi-woz': 'get_multi_cls_res'} 40 | 41 | def get_matching_res(self, probs, params=None): 42 | """ 43 | get matching score 44 | """ 45 | probs = list(probs) 46 | return probs[1] 47 | 48 | def get_cls_res(self, probs, params=None): 49 | """ 50 | get da classify tag 51 | """ 52 | probs = list(probs) 53 | max_prob = max(probs) 54 | tag = probs.index(max_prob) 55 | return tag 56 | 57 | def get_sequence_tagging(self, probs, params=None): 58 | """ 59 | get sequence tagging tag 60 | """ 61 | labels = [] 62 | batch_labels = np.array(probs).reshape(-1, params) 63 | labels = [" ".join([str(l) for l in list(l_l)]) for l_l in batch_labels] 64 | return labels 65 | 66 | def get_multi_cls_res(self, probs, params=None): 67 | """ 68 | get dst classify tag 69 | """ 70 | labels = [] 71 | probs = list(probs) 72 | for i in range(len(probs)): 73 | if probs[i] >= 0.5: 74 | labels.append(i) 75 | if not labels: 76 | max_prob = max(probs) 77 | label_str = str(probs.index(max_prob)) 78 | else: 79 | label_str = " ".join([str(l) for l in sorted(labels)]) 80 | 81 | return label_str 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /DGU/_ce.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """this file is only used for continuous evaluation test!""" 15 | 16 | import os 17 | import sys 18 | sys.path.append(os.environ['ceroot']) 19 | from kpi import CostKpi 20 | from kpi import DurationKpi 21 | from kpi import AccKpi 22 | 23 | each_step_duration_atis_slot_card1 = DurationKpi('each_step_duration_atis_slot_card1', 0.01, 0, actived=True) 24 | train_loss_atis_slot_card1 = CostKpi('train_loss_atis_slot_card1', 0.08, 0, actived=True) 25 | train_acc_atis_slot_card1 = CostKpi('train_acc_atis_slot_card1', 0.01, 0, actived=True) 26 | each_step_duration_atis_slot_card4 = DurationKpi('each_step_duration_atis_slot_card4', 0.06, 0, actived=True) 27 | train_loss_atis_slot_card4 = CostKpi('train_loss_atis_slot_card4', 0.03, 0, actived=True) 28 | train_acc_atis_slot_card4 = CostKpi('train_acc_atis_slot_card4', 0.01, 0, actived=True) 29 | 30 | tracking_kpis = [ 31 | each_step_duration_atis_slot_card1, 32 | train_loss_atis_slot_card1, 33 | train_acc_atis_slot_card1, 34 | each_step_duration_atis_slot_card4, 35 | train_loss_atis_slot_card4, 36 | train_acc_atis_slot_card4, 37 | ] 38 | 39 | 40 | def parse_log(log): 41 | ''' 42 | This method should be implemented by model developers. 43 | 44 | The suggestion: 45 | 46 | each line in the log should be key, value, for example: 47 | 48 | " 49 | train_cost\t1.0 50 | test_cost\t1.0 51 | train_cost\t1.0 52 | train_cost\t1.0 53 | train_acc\t1.2 54 | " 55 | ''' 56 | for line in log.split('\n'): 57 | fs = line.strip().split('\t') 58 | print(fs) 59 | if len(fs) == 3 and fs[0] == 'kpis': 60 | kpi_name = fs[1] 61 | kpi_value = float(fs[2]) 62 | yield kpi_name, kpi_value 63 | 64 | 65 | def log_to_ce(log): 66 | kpi_tracker = {} 67 | for kpi in tracking_kpis: 68 | kpi_tracker[kpi.name] = kpi 69 | 70 | for (kpi_name, kpi_value) in parse_log(log): 71 | print(kpi_name, kpi_value) 72 | kpi_tracker[kpi_name].add_record(kpi_value) 73 | kpi_tracker[kpi_name].persist() 74 | 75 | 76 | if __name__ == '__main__': 77 | log = sys.stdin.read() 78 | log_to_ce(log) 79 | -------------------------------------------------------------------------------- /DAM/utils/douban_evaluation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | from sklearn.metrics import average_precision_score 4 | 5 | def mean_average_precision(sort_data): 6 | #to do 7 | count_1 = 0 8 | sum_precision = 0 9 | for index in range(len(sort_data)): 10 | if sort_data[index][1] == 1: 11 | count_1 += 1 12 | sum_precision += 1.0 * count_1 / (index+1) 13 | return sum_precision / count_1 14 | 15 | def mean_reciprocal_rank(sort_data): 16 | sort_lable = [s_d[1] for s_d in sort_data] 17 | assert 1 in sort_lable 18 | return 1.0 / (1 + sort_lable.index(1)) 19 | 20 | def precision_at_position_1(sort_data): 21 | if sort_data[0][1] == 1: 22 | return 1 23 | else: 24 | return 0 25 | 26 | def recall_at_position_k_in_10(sort_data, k): 27 | sort_lable = [s_d[1] for s_d in sort_data] 28 | select_lable = sort_lable[:k] 29 | return 1.0 * select_lable.count(1) / sort_lable.count(1) 30 | 31 | def evaluation_one_session(data): 32 | sort_data = sorted(data, key=lambda x: x[0], reverse=True) 33 | m_a_p = mean_average_precision(sort_data) 34 | m_r_r = mean_reciprocal_rank(sort_data) 35 | p_1 = precision_at_position_1(sort_data) 36 | r_1 = recall_at_position_k_in_10(sort_data, 1) 37 | r_2 = recall_at_position_k_in_10(sort_data, 2) 38 | r_5 = recall_at_position_k_in_10(sort_data, 5) 39 | return m_a_p, m_r_r, p_1, r_1, r_2, r_5 40 | 41 | def evaluate(file_path): 42 | sum_m_a_p = 0 43 | sum_m_r_r = 0 44 | sum_p_1 = 0 45 | sum_r_1 = 0 46 | sum_r_2 = 0 47 | sum_r_5 = 0 48 | 49 | i = 0 50 | total_num = 0 51 | with open(file_path, 'r') as infile: 52 | for line in infile: 53 | if i % 10 == 0: 54 | data = [] 55 | 56 | tokens = line.strip().split('\t') 57 | data.append((float(tokens[0]), int(tokens[1]))) 58 | 59 | if i % 10 == 9: 60 | total_num += 1 61 | m_a_p, m_r_r, p_1, r_1, r_2, r_5 = evaluation_one_session(data) 62 | sum_m_a_p += m_a_p 63 | sum_m_r_r += m_r_r 64 | sum_p_1 += p_1 65 | sum_r_1 += r_1 66 | sum_r_2 += r_2 67 | sum_r_5 += r_5 68 | 69 | i += 1 70 | 71 | #print('total num: %s' %total_num) 72 | #print('MAP: %s' %(1.0*sum_m_a_p/total_num)) 73 | #print('MRR: %s' %(1.0*sum_m_r_r/total_num)) 74 | #print('P@1: %s' %(1.0*sum_p_1/total_num)) 75 | return (1.0*sum_m_a_p/total_num, 1.0*sum_m_r_r/total_num, 1.0*sum_p_1/total_num, 76 | 1.0*sum_r_1/total_num, 1.0*sum_r_2/total_num, 1.0*sum_r_5/total_num) 77 | 78 | if __name__ == '__main__': 79 | result = evaluate(sys.argv[1]) 80 | for r in result: 81 | print(r) 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /DAM/bin/test_and_evaluate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | 5 | import cPickle as pickle 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | import utils.reader as reader 10 | import utils.evaluation as eva 11 | 12 | 13 | def test(conf, _model): 14 | 15 | if not os.path.exists(conf['save_path']): 16 | os.makedirs(conf['save_path']) 17 | 18 | # load data 19 | print('starting loading data') 20 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 21 | train_data, val_data, test_data = pickle.load(open(conf["data_path"], 'rb')) 22 | print('finish loading data') 23 | 24 | test_batches = reader.build_batches(test_data, conf) 25 | 26 | print("finish building test batches") 27 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 28 | 29 | # refine conf 30 | test_batch_num = len(test_batches["response"]) 31 | 32 | print('configurations: %s' %conf) 33 | 34 | 35 | _graph = _model.build_graph() 36 | print('build graph sucess') 37 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 38 | 39 | with tf.Session(graph=_graph) as sess: 40 | #_model.init.run(); 41 | _model.saver.restore(sess, conf["init_model"]) 42 | print("sucess init %s" %conf["init_model"]) 43 | 44 | batch_index = 0 45 | step = 0 46 | 47 | score_file_path = conf['save_path'] + 'score.test' 48 | score_file = open(score_file_path, 'w') 49 | 50 | print('starting test') 51 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 52 | for batch_index in xrange(test_batch_num): 53 | 54 | feed = { 55 | _model.turns: test_batches["turns"][batch_index], 56 | _model.tt_turns_len: test_batches["tt_turns_len"][batch_index], 57 | _model.every_turn_len: test_batches["every_turn_len"][batch_index], 58 | _model.response: test_batches["response"][batch_index], 59 | _model.response_len: test_batches["response_len"][batch_index], 60 | _model.label: test_batches["label"][batch_index] 61 | } 62 | 63 | scores = sess.run(_model.logits, feed_dict = feed) 64 | 65 | for i in xrange(conf["batch_size"]): 66 | score_file.write( 67 | str(scores[i]) + '\t' + 68 | str(test_batches["label"][batch_index][i]) + '\n') 69 | #str(sum(test_batches["every_turn_len"][batch_index][i]) / test_batches['tt_turns_len'][batch_index][i]) + '\t' + 70 | #str(test_batches['tt_turns_len'][batch_index][i]) + '\n') 71 | 72 | score_file.close() 73 | print('finish test') 74 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 75 | 76 | 77 | #write evaluation result 78 | result = eva.evaluate(score_file_path) 79 | result_file_path = conf["save_path"] + "result.test" 80 | with open(result_file_path, 'w') as out_file: 81 | for p_at in result: 82 | out_file.write(str(p_at) + '\n') 83 | print('finish evaluation') 84 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /DAM/README.md: -------------------------------------------------------------------------------- 1 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 2 | 3 | # __New Record__ 4 | 5 | We got the new, best score of R_10 at 1 (85.67%) in the Ubuntu Corpus by incorporating ERNIE_English, an English pre-trained model from Baidu. Please refer to DMTK (the Dialogue Modeling ToolKit) for more details. https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/dialogue_model_toolkit 6 | 7 | 8 | 9 | # __Deep Attention Matching Network__ 10 | 11 | This is the source code of Deep Attention Matching network (DAM), that is proposed for multi-turn response selection in the retrieval-based chatbot. 12 | 13 | DAM is a neural matching network that entirely based on attention mechanism. The motivation of DAM is to capture those semantic dependencies, among dialogue elements at different level of granularities, in multi-turn conversation as matching evidences, in order to better match response candidate with its multi-turn context. DAM will appear on ACL-2018, please find our paper at: http://acl2018.org/conference/accepted-papers/. 14 | 15 | ## __Paddle Version__ 16 | 17 | DAM is originally implemented with Tensorflow, we highly recommend using the paddle version as Paddle supports parallely training with very large corpus. 18 | 19 | You can find the paddle version at: https://github.com/PaddlePaddle/models/tree/develop/fluid . 20 | 21 | ## __Network__ 22 | 23 | DAM is inspired by Transformer in Machine Translation (Vaswani et al., 2017), and we extend the key attention mechanism of Transformer in two perspectives and introduce those two kinds of attention in one uniform neural network. 24 | 25 | - **self-attention** To gradually capture semantic representations in different granularities by stacking attention from word-level embeddings. Those multi-grained semantic representations would facilitate exploring segmental dependencies between context and response. 26 | 27 | - **cross-attention** Attention across context and response can generally capture the relevance in dependency between segment pairs, which could provide complementary information to textual relevance for matching response with multi-turn context. 28 | 29 |
30 | 31 |
32 | 33 | ## __Results__ 34 | 35 | We test DAM on two large-scale multi-turn response selection tasks, i.e., the Ubuntu Corpus v1 and Douban Conversation Corpus, experimental results are bellow: 36 | 37 | 38 | 39 | ## __Usage__ 40 | 41 | First, please download [data](https://pan.baidu.com/s/1hakfuuwdS8xl7NyxlWzRiQ "data") and unzip it: 42 | ``` 43 | cd data 44 | unzip data.zip 45 | ``` 46 | 47 | If you want use well trained models directly, please download [models](https://pan.baidu.com/s/1pl4d63MBxihgrEWWfdAz0w "models") and unzip it: 48 | ``` 49 | cd output 50 | unzip output.zip 51 | ``` 52 | 53 | Train and test the model by: 54 | ``` 55 | sh run.sh 56 | ``` 57 | 58 | ## __Dependencies__ 59 | 60 | - Python >= 2.7.3 61 | - Tensorflow == 1.2.1 62 | 63 | ## __Citation__ 64 | 65 | The following article describe the DAM in detail. We recommend citing this article as default. 66 | 67 | ``` 68 | @inproceedings{ , 69 | title={Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network}, 70 | author={Xiangyang Zhou, Lu Li, Daxiang Dong, Yi Liu, Ying Chen, Wayne Xin Zhao, Dianhai Yu and Hua Wu}, 71 | booktitle={Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, 72 | volume={1}, 73 | pages={ -- }, 74 | year={2018} 75 | } 76 | ``` 77 | 78 | 79 | -------------------------------------------------------------------------------- /ADE/inference_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """save inference model for auto dialogue evaluation""" 15 | 16 | import os 17 | import sys 18 | import six 19 | import numpy as np 20 | import time 21 | import paddle 22 | import paddle.fluid as fluid 23 | 24 | import ade.reader as reader 25 | from ade_net import create_net 26 | 27 | from ade.utils.configure import PDConfig 28 | from ade.utils.input_field import InputField 29 | from ade.utils.model_check import check_cuda 30 | import ade.utils.save_load_io as save_load_io 31 | 32 | 33 | def do_save_inference_model(args): 34 | 35 | test_prog = fluid.default_main_program() 36 | startup_prog = fluid.default_startup_program() 37 | 38 | with fluid.program_guard(test_prog, startup_prog): 39 | test_prog.random_seed = args.random_seed 40 | startup_prog.random_seed = args.random_seed 41 | 42 | with fluid.unique_name.guard(): 43 | 44 | context_wordseq = fluid.data( 45 | name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) 46 | response_wordseq = fluid.data( 47 | name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) 48 | labels = fluid.data( 49 | name='labels', shape=[-1, 1], dtype='int64') 50 | 51 | input_inst = [context_wordseq, response_wordseq, labels] 52 | input_field = InputField(input_inst) 53 | data_reader = fluid.io.PyReader(feed_list=input_inst, 54 | capacity=4, iterable=False) 55 | 56 | logits = create_net( 57 | is_training=False, 58 | model_input=input_field, 59 | args=args 60 | ) 61 | 62 | if args.use_cuda: 63 | place = fluid.CUDAPlace(0) 64 | else: 65 | place = fluid.CPUPlace() 66 | 67 | exe = fluid.Executor(place) 68 | exe.run(startup_prog) 69 | 70 | assert (args.init_from_params) or (args.init_from_pretrain_model) 71 | 72 | if args.init_from_params: 73 | save_load_io.init_from_params(args, exe, test_prog) 74 | elif args.init_from_pretrain_model: 75 | save_load_io.init_from_pretrain_model(args, exe, test_prog) 76 | 77 | # saving inference model 78 | fluid.io.save_inference_model( 79 | args.inference_model_dir, 80 | feeded_var_names=[ 81 | input_field.context_wordseq.name, 82 | input_field.response_wordseq.name, 83 | ], 84 | target_vars=[ 85 | logits, 86 | ], 87 | executor=exe, 88 | main_program=test_prog, 89 | model_filename="model.pdmodel", 90 | params_filename="params.pdparams") 91 | 92 | print("save inference model at %s" % (args.inference_model_dir)) 93 | 94 | 95 | if __name__ == "__main__": 96 | args = PDConfig(yaml_file="./data/config/ade.yaml") 97 | args.build() 98 | 99 | check_cuda(args.use_cuda) 100 | 101 | do_save_inference_model(args) 102 | -------------------------------------------------------------------------------- /DGU/dgu/utils/fp16.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function 16 | import paddle 17 | import paddle.fluid as fluid 18 | 19 | 20 | def cast_fp16_to_fp32(i, o, prog): 21 | prog.global_block().append_op( 22 | type="cast", 23 | inputs={"X": i}, 24 | outputs={"Out": o}, 25 | attrs={ 26 | "in_dtype": fluid.core.VarDesc.VarType.FP16, 27 | "out_dtype": fluid.core.VarDesc.VarType.FP32 28 | }) 29 | 30 | 31 | def cast_fp32_to_fp16(i, o, prog): 32 | prog.global_block().append_op( 33 | type="cast", 34 | inputs={"X": i}, 35 | outputs={"Out": o}, 36 | attrs={ 37 | "in_dtype": fluid.core.VarDesc.VarType.FP32, 38 | "out_dtype": fluid.core.VarDesc.VarType.FP16 39 | }) 40 | 41 | 42 | def copy_to_master_param(p, block): 43 | v = block.vars.get(p.name, None) 44 | if v is None: 45 | raise ValueError("no param name %s found!" % p.name) 46 | new_p = fluid.framework.Parameter( 47 | block=block, 48 | shape=v.shape, 49 | dtype=fluid.core.VarDesc.VarType.FP32, 50 | type=v.type, 51 | lod_level=v.lod_level, 52 | stop_gradient=p.stop_gradient, 53 | trainable=p.trainable, 54 | optimize_attr=p.optimize_attr, 55 | regularizer=p.regularizer, 56 | gradient_clip_attr=p.gradient_clip_attr, 57 | error_clip=p.error_clip, 58 | name=v.name + ".master") 59 | return new_p 60 | 61 | 62 | def create_master_params_grads(params_grads, main_prog, startup_prog, 63 | loss_scaling): 64 | master_params_grads = [] 65 | tmp_role = main_prog._current_role 66 | OpRole = fluid.core.op_proto_and_checker_maker.OpRole 67 | main_prog._current_role = OpRole.Backward 68 | for p, g in params_grads: 69 | # create master parameters 70 | master_param = copy_to_master_param(p, main_prog.global_block()) 71 | startup_master_param = startup_prog.global_block()._clone_variable( 72 | master_param) 73 | startup_p = startup_prog.global_block().var(p.name) 74 | cast_fp16_to_fp32(startup_p, startup_master_param, startup_prog) 75 | # cast fp16 gradients to fp32 before apply gradients 76 | if g.name.find("layer_norm") > -1: 77 | if loss_scaling > 1: 78 | scaled_g = g / float(loss_scaling) 79 | else: 80 | scaled_g = g 81 | master_params_grads.append([p, scaled_g]) 82 | continue 83 | master_grad = fluid.layers.cast(g, "float32") 84 | if loss_scaling > 1: 85 | master_grad = master_grad / float(loss_scaling) 86 | master_params_grads.append([master_param, master_grad]) 87 | main_prog._current_role = tmp_role 88 | return master_params_grads 89 | 90 | 91 | def master_param_to_train_param(master_params_grads, params_grads, main_prog): 92 | for idx, m_p_g in enumerate(master_params_grads): 93 | train_p, _ = params_grads[idx] 94 | if train_p.name.find("layer_norm") > -1: 95 | continue 96 | with main_prog._optimized_guard([m_p_g[0], m_p_g[1]]): 97 | cast_fp32_to_fp16(m_p_g[0], train_p, main_prog) 98 | -------------------------------------------------------------------------------- /ADE/ade/utils/save_load_io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """save or load model api""" 15 | 16 | import os 17 | import sys 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | 22 | 23 | def init_from_pretrain_model(args, exe, program): 24 | 25 | assert isinstance(args.init_from_pretrain_model, str) 26 | 27 | if not os.path.exists(args.init_from_pretrain_model): 28 | raise Warning("The pretrained params do not exist.") 29 | return False 30 | 31 | def existed_params(var): 32 | if not isinstance(var, fluid.framework.Parameter): 33 | return False 34 | return os.path.exists( 35 | os.path.join(args.init_from_pretrain_model, var.name)) 36 | 37 | fluid.io.load_vars( 38 | exe, 39 | args.init_from_pretrain_model, 40 | main_program=program, 41 | predicate=existed_params) 42 | 43 | print("finish initing model from pretrained params from %s" % 44 | (args.init_from_pretrain_model)) 45 | 46 | return True 47 | 48 | 49 | def init_from_checkpoint(args, exe, program): 50 | 51 | assert isinstance(args.init_from_checkpoint, str) 52 | 53 | if not os.path.exists(args.init_from_checkpoint): 54 | raise Warning("the checkpoint path does not exist.") 55 | return False 56 | 57 | fluid.io.load_persistables( 58 | executor=exe, 59 | dirname=args.init_from_checkpoint, 60 | main_program=program, 61 | filename="checkpoint.pdckpt") 62 | 63 | print("finish initing model from checkpoint from %s" % 64 | (args.init_from_checkpoint)) 65 | 66 | return True 67 | 68 | 69 | def init_from_params(args, exe, program): 70 | 71 | assert isinstance(args.init_from_params, str) 72 | 73 | if not os.path.exists(args.init_from_params): 74 | raise Warning("the params path does not exist.") 75 | return False 76 | 77 | fluid.io.load_params( 78 | executor=exe, 79 | dirname=args.init_from_params, 80 | main_program=program, 81 | filename="params.pdparams") 82 | 83 | print("finish init model from params from %s" % (args.init_from_params)) 84 | 85 | return True 86 | 87 | 88 | def save_checkpoint(args, exe, program, dirname): 89 | 90 | assert isinstance(args.save_model_path, str) 91 | 92 | checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint) 93 | 94 | if not os.path.exists(checkpoint_dir): 95 | os.mkdir(checkpoint_dir) 96 | 97 | fluid.io.save_persistables( 98 | exe, 99 | os.path.join(checkpoint_dir, dirname), 100 | main_program=program, 101 | filename="checkpoint.pdckpt") 102 | 103 | print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname))) 104 | 105 | return True 106 | 107 | 108 | def save_param(args, exe, program, dirname): 109 | 110 | assert isinstance(args.save_model_path, str) 111 | 112 | param_dir = os.path.join(args.save_model_path, args.save_param) 113 | 114 | if not os.path.exists(param_dir): 115 | os.makedirs(param_dir) 116 | 117 | fluid.io.save_params( 118 | exe, 119 | os.path.join(param_dir, dirname), 120 | main_program=program, 121 | filename="params.pdparams") 122 | print("save parameters at %s" % (os.path.join(param_dir, dirname))) 123 | 124 | return True 125 | 126 | 127 | -------------------------------------------------------------------------------- /DGU/dgu/utils/save_load_io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """save or load model api""" 15 | 16 | import os 17 | import sys 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | 22 | 23 | def init_from_pretrain_model(args, exe, program): 24 | 25 | assert isinstance(args.init_from_pretrain_model, str) 26 | 27 | if not os.path.exists(args.init_from_pretrain_model): 28 | raise Warning("The pretrained params do not exist.") 29 | return False 30 | 31 | def existed_params(var): 32 | if not isinstance(var, fluid.framework.Parameter): 33 | return False 34 | return os.path.exists( 35 | os.path.join(args.init_from_pretrain_model, var.name)) 36 | 37 | fluid.io.load_vars( 38 | exe, 39 | args.init_from_pretrain_model, 40 | main_program=program, 41 | predicate=existed_params) 42 | 43 | print("finish initing model from pretrained params from %s" % 44 | (args.init_from_pretrain_model)) 45 | 46 | return True 47 | 48 | 49 | def init_from_checkpoint(args, exe, program): 50 | 51 | assert isinstance(args.init_from_checkpoint, str) 52 | 53 | if not os.path.exists(args.init_from_checkpoint): 54 | raise Warning("the checkpoint path does not exist.") 55 | return False 56 | 57 | fluid.io.load_persistables( 58 | executor=exe, 59 | dirname=args.init_from_checkpoint, 60 | main_program=program, 61 | filename="checkpoint.pdckpt") 62 | 63 | print("finish initing model from checkpoint from %s" % 64 | (args.init_from_checkpoint)) 65 | 66 | return True 67 | 68 | 69 | def init_from_params(args, exe, program): 70 | 71 | assert isinstance(args.init_from_params, str) 72 | 73 | if not os.path.exists(args.init_from_params): 74 | raise Warning("the params path does not exist.") 75 | return False 76 | 77 | fluid.io.load_params( 78 | executor=exe, 79 | dirname=args.init_from_params, 80 | main_program=program, 81 | filename="params.pdparams") 82 | 83 | print("finish init model from params from %s" % (args.init_from_params)) 84 | 85 | return True 86 | 87 | 88 | def save_checkpoint(args, exe, program, dirname): 89 | 90 | assert isinstance(args.save_model_path, str) 91 | 92 | checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint) 93 | 94 | if not os.path.exists(checkpoint_dir): 95 | os.mkdir(checkpoint_dir) 96 | 97 | fluid.io.save_persistables( 98 | exe, 99 | os.path.join(checkpoint_dir, dirname), 100 | main_program=program, 101 | filename="checkpoint.pdckpt") 102 | 103 | print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname))) 104 | 105 | return True 106 | 107 | 108 | def save_param(args, exe, program, dirname): 109 | 110 | assert isinstance(args.save_model_path, str) 111 | 112 | param_dir = os.path.join(args.save_model_path, args.save_param) 113 | 114 | if not os.path.exists(param_dir): 115 | os.makedirs(param_dir) 116 | 117 | fluid.io.save_params( 118 | exe, 119 | os.path.join(param_dir, dirname), 120 | main_program=program, 121 | filename="params.pdparams") 122 | print("save parameters at %s" % (os.path.join(param_dir, dirname))) 123 | 124 | return True 125 | 126 | 127 | -------------------------------------------------------------------------------- /ADE/ade_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Network for auto dialogue evaluation""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import paddle 21 | import paddle.fluid as fluid 22 | 23 | 24 | def create_net( 25 | is_training, 26 | model_input, 27 | args, 28 | clip_value=10.0, 29 | word_emb_name="shared_word_emb", 30 | lstm_W_name="shared_lstm_W", 31 | lstm_bias_name="shared_lstm_bias"): 32 | 33 | context_wordseq = model_input.context_wordseq 34 | response_wordseq = model_input.response_wordseq 35 | label = model_input.labels 36 | 37 | #emb 38 | context_emb = fluid.input.embedding( 39 | input=context_wordseq, 40 | size=[args.vocab_size, args.emb_size], 41 | is_sparse=True, 42 | param_attr=fluid.ParamAttr( 43 | name=word_emb_name, 44 | initializer=fluid.initializer.Normal(scale=0.1))) 45 | 46 | response_emb = fluid.input.embedding( 47 | input=response_wordseq, 48 | size=[args.vocab_size, args.emb_size], 49 | is_sparse=True, 50 | param_attr=fluid.ParamAttr( 51 | name=word_emb_name, 52 | initializer=fluid.initializer.Normal(scale=0.1))) 53 | 54 | #fc to fit dynamic LSTM 55 | context_fc = fluid.layers.fc( 56 | input=context_emb, 57 | size=args.hidden_size * 4, 58 | param_attr=fluid.ParamAttr(name='fc_weight'), 59 | bias_attr=fluid.ParamAttr(name='fc_bias')) 60 | 61 | response_fc = fluid.layers.fc( 62 | input=response_emb, 63 | size=args.hidden_size * 4, 64 | param_attr=fluid.ParamAttr(name='fc_weight'), 65 | bias_attr=fluid.ParamAttr(name='fc_bias')) 66 | 67 | #LSTM 68 | context_rep, _ = fluid.layers.dynamic_lstm( 69 | input=context_fc, 70 | size=args.hidden_size * 4, 71 | param_attr=fluid.ParamAttr(name=lstm_W_name), 72 | bias_attr=fluid.ParamAttr(name=lstm_bias_name)) 73 | context_rep = fluid.layers.sequence_last_step(context_rep) 74 | 75 | response_rep, _ = fluid.layers.dynamic_lstm( 76 | input=response_fc, 77 | size=args.hidden_size * 4, 78 | param_attr=fluid.ParamAttr(name=lstm_W_name), 79 | bias_attr=fluid.ParamAttr(name=lstm_bias_name)) 80 | response_rep = fluid.layers.sequence_last_step(input=response_rep) 81 | 82 | logits = fluid.layers.bilinear_tensor_product( 83 | context_rep, response_rep, size=1) 84 | 85 | if args.loss_type == 'CLS': 86 | label = fluid.layers.cast(x=label, dtype='float32') 87 | loss = fluid.layers.sigmoid_cross_entropy_with_logits(logits, label) 88 | loss = fluid.layers.reduce_mean( 89 | fluid.layers.clip( 90 | loss, min=-clip_value, max=clip_value)) 91 | elif args.loss_type == 'L2': 92 | norm_score = 2 * fluid.layers.sigmoid(logits) 93 | label = fluid.layers.cast(x=label, dtype='float32') 94 | loss = fluid.layers.square_error_cost(norm_score, label) / 4 95 | loss = fluid.layers.reduce_mean(loss) 96 | else: 97 | raise ValueError 98 | 99 | if is_training: 100 | return loss 101 | else: 102 | return logits 103 | 104 | 105 | def set_word_embedding(word_emb, place, word_emb_name="shared_word_emb"): 106 | """ 107 | Set word embedding 108 | """ 109 | word_emb_param = fluid.global_scope().find_var( 110 | word_emb_name).get_tensor() 111 | word_emb_param.set(word_emb, place) 112 | 113 | -------------------------------------------------------------------------------- /ADE/predict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """predict auto dialogue evaluation task""" 16 | import io 17 | import os 18 | import sys 19 | import six 20 | import time 21 | import numpy as np 22 | 23 | import paddle 24 | import paddle.fluid as fluid 25 | 26 | import ade.reader as reader 27 | from ade_net import create_net 28 | 29 | from ade.utils.configure import PDConfig 30 | from ade.utils.input_field import InputField 31 | from ade.utils.model_check import check_cuda 32 | import ade.utils.save_load_io as save_load_io 33 | 34 | 35 | def do_predict(args): 36 | """ 37 | predict function 38 | """ 39 | test_prog = fluid.default_main_program() 40 | startup_prog = fluid.default_startup_program() 41 | 42 | with fluid.program_guard(test_prog, startup_prog): 43 | test_prog.random_seed = args.random_seed 44 | startup_prog.random_seed = args.random_seed 45 | 46 | with fluid.unique_name.guard(): 47 | 48 | context_wordseq = fluid.data( 49 | name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) 50 | response_wordseq = fluid.data( 51 | name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) 52 | labels = fluid.data( 53 | name='labels', shape=[-1, 1], dtype='int64') 54 | 55 | input_inst = [context_wordseq, response_wordseq, labels] 56 | input_field = InputField(input_inst) 57 | data_reader = fluid.io.PyReader(feed_list=input_inst, 58 | capacity=4, iterable=False) 59 | 60 | logits = create_net( 61 | is_training=False, 62 | model_input=input_field, 63 | args=args 64 | ) 65 | logits.persistable = True 66 | 67 | fetch_list = [logits.name] 68 | #for_test is True if change the is_test attribute of operators to True 69 | test_prog = test_prog.clone(for_test=True) 70 | if args.use_cuda: 71 | place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) 72 | else: 73 | place = fluid.CPUPlace() 74 | 75 | exe = fluid.Executor(place) 76 | exe.run(startup_prog) 77 | 78 | assert (args.init_from_params) or (args.init_from_pretrain_model) 79 | if args.init_from_params: 80 | save_load_io.init_from_params(args, exe, test_prog) 81 | if args.init_from_pretrain_model: 82 | save_load_io.init_from_pretrain_model(args, exe, test_prog) 83 | 84 | compiled_test_prog = fluid.CompiledProgram(test_prog) 85 | 86 | processor = reader.DataProcessor( 87 | data_path=args.predict_file, 88 | max_seq_length=args.max_seq_len, 89 | batch_size=args.batch_size) 90 | 91 | batch_generator = processor.data_generator( 92 | place=place, 93 | phase="test", 94 | shuffle=False, 95 | sample_pro=1) 96 | num_test_examples = processor.get_num_examples(phase='test') 97 | 98 | data_reader.decorate_batch_generator(batch_generator) 99 | data_reader.start() 100 | 101 | scores = [] 102 | while True: 103 | try: 104 | results = exe.run(compiled_test_prog, fetch_list=fetch_list) 105 | scores.extend(results[0]) 106 | except fluid.core.EOFException: 107 | data_reader.reset() 108 | break 109 | 110 | scores = scores[: num_test_examples] 111 | print("Write the predicted results into the output_prediction_file") 112 | fw = io.open(args.output_prediction_file, 'w', encoding="utf8") 113 | for index, score in enumerate(scores): 114 | fw.write("%s\t%s\n" % (index, score)) 115 | print("finish........................................") 116 | 117 | 118 | if __name__ == "__main__": 119 | 120 | args = PDConfig(yaml_file="./data/config/ade.yaml") 121 | args.build() 122 | args.Print() 123 | 124 | check_cuda(args.use_cuda) 125 | 126 | do_predict(args) 127 | -------------------------------------------------------------------------------- /DGU/inference_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """save inference model""" 15 | 16 | import os 17 | import sys 18 | import argparse 19 | import collections 20 | import numpy as np 21 | 22 | import paddle 23 | import paddle.fluid as fluid 24 | 25 | from dgu.utils.configure import PDConfig 26 | from dgu.utils.input_field import InputField 27 | from dgu.utils.model_check import check_cuda 28 | import dgu.utils.save_load_io as save_load_io 29 | 30 | import dgu.reader as reader 31 | from dgu_net import create_net 32 | import dgu.define_paradigm as define_paradigm 33 | 34 | 35 | def do_save_inference_model(args): 36 | """save inference model function""" 37 | 38 | task_name = args.task_name.lower() 39 | paradigm_inst = define_paradigm.Paradigm(task_name) 40 | 41 | processors = { 42 | 'udc': reader.UDCProcessor, 43 | 'swda': reader.SWDAProcessor, 44 | 'mrda': reader.MRDAProcessor, 45 | 'atis_slot': reader.ATISSlotProcessor, 46 | 'atis_intent': reader.ATISIntentProcessor, 47 | 'dstc2': reader.DSTC2Processor, 48 | } 49 | 50 | test_prog = fluid.default_main_program() 51 | startup_prog = fluid.default_startup_program() 52 | 53 | with fluid.program_guard(test_prog, startup_prog): 54 | test_prog.random_seed = args.random_seed 55 | startup_prog.random_seed = args.random_seed 56 | 57 | with fluid.unique_name.guard(): 58 | 59 | # define inputs of the network 60 | num_labels = len(processors[task_name].get_labels()) 61 | 62 | src_ids = fluid.data( 63 | name='src_ids', shape=[-1, args.max_seq_len], dtype='int64') 64 | pos_ids = fluid.data( 65 | name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64') 66 | sent_ids = fluid.data( 67 | name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64') 68 | input_mask = fluid.data( 69 | name='input_mask', shape=[-1, args.max_seq_len], dtype='float32') 70 | if args.task_name == 'atis_slot': 71 | labels = fluid.data( 72 | name='labels', shape=[-1, args.max_seq_len], dtype='int64') 73 | elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']: 74 | labels = fluid.data( 75 | name='labels', shape=[-1, num_labels], dtype='int64') 76 | else: 77 | labels = fluid.data( 78 | name='labels', shape=[-1, 1], dtype='int64') 79 | 80 | input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] 81 | input_field = InputField(input_inst) 82 | 83 | results = create_net( 84 | is_training=False, 85 | model_input=input_field, 86 | num_labels=num_labels, 87 | paradigm_inst=paradigm_inst, 88 | args=args) 89 | probs = results.get("probs", None) 90 | 91 | if args.use_cuda: 92 | place = fluid.CUDAPlace(0) 93 | else: 94 | place = fluid.CPUPlace() 95 | 96 | exe = fluid.Executor(place) 97 | exe.run(startup_prog) 98 | 99 | assert (args.init_from_params) or (args.init_from_pretrain_model) 100 | 101 | if args.init_from_params: 102 | save_load_io.init_from_params(args, exe, test_prog) 103 | elif args.init_from_pretrain_model: 104 | save_load_io.init_from_pretrain_model(args, exe, test_prog) 105 | 106 | # saving inference model 107 | fluid.io.save_inference_model( 108 | args.inference_model_dir, 109 | feeded_var_names=[ 110 | input_field.src_ids.name, 111 | input_field.pos_ids.name, 112 | input_field.sent_ids.name, 113 | input_field.input_mask.name 114 | ], 115 | target_vars=[ 116 | probs 117 | ], 118 | executor=exe, 119 | main_program=test_prog, 120 | model_filename="model.pdmodel", 121 | params_filename="params.pdparams") 122 | 123 | print("save inference model at %s" % (args.inference_model_dir)) 124 | 125 | 126 | if __name__ == "__main__": 127 | 128 | args = PDConfig(yaml_file="./data/config/dgu.yaml") 129 | args.build() 130 | 131 | check_cuda(args.use_cuda) 132 | 133 | do_save_inference_model(args) 134 | -------------------------------------------------------------------------------- /ADE/ade/reader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Reader for auto dialogue evaluation""" 16 | 17 | import io 18 | import sys 19 | import time 20 | import random 21 | import numpy as np 22 | 23 | import paddle 24 | import paddle.fluid as fluid 25 | 26 | 27 | class DataProcessor(object): 28 | def __init__(self, data_path, max_seq_length, batch_size): 29 | """init""" 30 | self.data_file = data_path 31 | self.max_seq_len = max_seq_length 32 | self.batch_size = batch_size 33 | self.num_examples = {'train': -1, 'dev': -1, 'test': -1} 34 | 35 | def get_examples(self): 36 | """load examples""" 37 | examples = [] 38 | index = 0 39 | fr = io.open(self.data_file, 'r', encoding="utf8") 40 | for line in fr: 41 | if index !=0 and index % 100 == 0: 42 | print("processing data: %d" % index) 43 | index += 1 44 | examples.append(line.strip()) 45 | return examples 46 | 47 | def get_num_examples(self, phase): 48 | """Get number of examples for train, dev or test.""" 49 | if phase not in ['train', 'dev', 'test']: 50 | raise ValueError( 51 | "Unknown phase, which should be in ['train', 'dev', 'test'].") 52 | count = len(io.open(self.data_file, 'r', encoding="utf8").readlines()) 53 | self.num_examples[phase] = count 54 | return self.num_examples[phase] 55 | 56 | def data_generator(self, 57 | place, 58 | phase="train", 59 | shuffle=True, 60 | sample_pro=1): 61 | """ 62 | Generate data for train, dev or test. 63 | 64 | Args: 65 | phase: string. The phase for which to generate data. 66 | shuffle: bool. Whether to shuffle examples. 67 | sample_pro: sample data ratio 68 | """ 69 | examples = self.get_examples() 70 | if shuffle: 71 | np.random.shuffle(examples) 72 | 73 | def batch_reader(): 74 | """read batch data""" 75 | batch = [] 76 | for example in examples: 77 | if sample_pro < 1: 78 | if random.random() > sample_pro: 79 | continue 80 | tokens = example.strip().split('\t') 81 | 82 | if len(tokens) != 3: 83 | print("data format error: %s" % example.strip()) 84 | print("please input data: context \t response \t label") 85 | continue 86 | 87 | context = [int(x) for x in tokens[0].split()[: self.max_seq_len]] 88 | response = [int(x) for x in tokens[1].split()[: self.max_seq_len]] 89 | label = [int(tokens[2])] 90 | instance = (context, response, label) 91 | 92 | if len(batch) < self.batch_size: 93 | batch.append(instance) 94 | else: 95 | if len(batch) == self.batch_size: 96 | yield batch 97 | batch = [instance] 98 | 99 | if len(batch) > 0: 100 | yield batch 101 | 102 | def create_lodtensor(data_ids, place): 103 | """create LodTensor for input ids""" 104 | cur_len = 0 105 | lod = [cur_len] 106 | seq_lens = [len(ids) for ids in data_ids] 107 | for l in seq_lens: 108 | cur_len += l 109 | lod.append(cur_len) 110 | flattened_data = np.concatenate(data_ids, axis=0).astype("int64") 111 | flattened_data = flattened_data.reshape([len(flattened_data), 1]) 112 | res = fluid.LoDTensor() 113 | res.set(flattened_data, place) 114 | res.set_lod([lod]) 115 | return res 116 | 117 | def wrapper(): 118 | """yield batch data to network""" 119 | for batch_data in batch_reader(): 120 | context_ids = [batch[0] for batch in batch_data] 121 | response_ids = [batch[1] for batch in batch_data] 122 | label_ids = [batch[2] for batch in batch_data] 123 | context_res = create_lodtensor(context_ids, place) 124 | response_res = create_lodtensor(response_ids, place) 125 | label_ids = np.array(label_ids).astype("int64").reshape([-1, 1]) 126 | input_batch = [context_res, response_res, label_ids] 127 | yield input_batch 128 | 129 | return wrapper 130 | 131 | -------------------------------------------------------------------------------- /DGU/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export FLAGS_sync_nccl_allreduce=0 4 | export FLAGS_eager_delete_tensor_gb=1 5 | 6 | export CUDA_VISIBLE_DEVICES=0 7 | if [ ! "$CUDA_VISIBLE_DEVICES" ] 8 | then 9 | export CPU_NUM=1 10 | use_cuda=false 11 | else 12 | use_cuda=true 13 | fi 14 | 15 | TASK_NAME=$1 16 | TASK_TYPE=$2 17 | 18 | BERT_BASE_PATH="./data/pretrain_model/uncased_L-12_H-768_A-12" 19 | INPUT_PATH="./data/input/data/${TASK_NAME}" 20 | SAVE_MODEL_PATH="./data/saved_models/${TASK_NAME}" 21 | TRAIN_MODEL_PATH="./data/saved_models/trained_models" 22 | OUTPUT_PATH="./data/output" 23 | INFERENCE_MODEL="data/inference_models" 24 | PYTHON_PATH="python" 25 | 26 | if [ -f ${SAVE_MODEL_PATH} ]; then 27 | rm ${SAVE_MODEL_PATH} 28 | fi 29 | 30 | if [ ! -d ${SAVE_MODEL_PATH} ]; then 31 | mkdir ${SAVE_MODEL_PATH} 32 | fi 33 | 34 | #parameter configuration 35 | if [ "${TASK_NAME}" = "udc" ] 36 | then 37 | save_steps=1000 38 | max_seq_len=210 39 | print_steps=1000 40 | batch_size=6720 41 | in_tokens=true 42 | epoch=2 43 | learning_rate=2e-5 44 | elif [ "${TASK_NAME}" = "swda" ] 45 | then 46 | save_steps=500 47 | max_seq_len=128 48 | print_steps=200 49 | batch_size=6720 50 | in_tokens=true 51 | epoch=3 52 | learning_rate=2e-5 53 | elif [ "${TASK_NAME}" = "mrda" ] 54 | then 55 | save_steps=500 56 | max_seq_len=128 57 | print_steps=200 58 | batch_size=4096 59 | in_tokens=true 60 | epoch=7 61 | learning_rate=2e-5 62 | elif [ "${TASK_NAME}" = "atis_intent" ] 63 | then 64 | save_steps=100 65 | max_seq_len=128 66 | print_steps=10 67 | batch_size=4096 68 | in_tokens=true 69 | epoch=20 70 | learning_rate=2e-5 71 | INPUT_PATH="./data/input/data/atis/${TASK_NAME}" 72 | elif [ "${TASK_NAME}" = "atis_slot" ] 73 | then 74 | save_steps=100 75 | max_seq_len=128 76 | print_steps=10 77 | batch_size=32 78 | in_tokens=False 79 | epoch=50 80 | learning_rate=2e-5 81 | INPUT_PATH="./data/input/data/atis/${TASK_NAME}" 82 | elif [ "${TASK_NAME}" = "dstc2" ] 83 | then 84 | save_steps=400 85 | print_steps=20 86 | batch_size=8192 87 | in_tokens=true 88 | epoch=40 89 | learning_rate=5e-5 90 | INPUT_PATH="./data/input/data/dstc2/${TASK_NAME}" 91 | if [ "${TASK_TYPE}" = "train" ] 92 | then 93 | max_seq_len=256 94 | else 95 | max_seq_len=512 96 | fi 97 | else 98 | echo "not support ${TASK_NAME} dataset.." 99 | exit 255 100 | fi 101 | 102 | #training 103 | function train() 104 | { 105 | $PYTHON_PATH -u main.py \ 106 | --task_name=${TASK_NAME} \ 107 | --use_cuda=$1 \ 108 | --do_train=true \ 109 | --in_tokens=${in_tokens} \ 110 | --epoch=${epoch} \ 111 | --batch_size=${batch_size} \ 112 | --do_lower_case=true \ 113 | --data_dir=${INPUT_PATH} \ 114 | --bert_config_path=${BERT_BASE_PATH}/bert_config.json \ 115 | --vocab_path=${BERT_BASE_PATH}/vocab.txt \ 116 | --init_from_pretrain_model=${BERT_BASE_PATH}/params \ 117 | --save_model_path=${SAVE_MODEL_PATH} \ 118 | --save_param="params" \ 119 | --save_steps=${save_steps} \ 120 | --learning_rate=${learning_rate} \ 121 | --weight_decay=0.01 \ 122 | --max_seq_len=${max_seq_len} \ 123 | --print_steps=${print_steps}; 124 | } 125 | 126 | #predicting 127 | function predict() 128 | { 129 | $PYTHON_PATH -u main.py \ 130 | --task_name=${TASK_NAME} \ 131 | --use_cuda=$1 \ 132 | --do_predict=true \ 133 | --in_tokens=${in_tokens} \ 134 | --batch_size=${batch_size} \ 135 | --data_dir=${INPUT_PATH} \ 136 | --do_lower_case=true \ 137 | --init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params \ 138 | --bert_config_path=${BERT_BASE_PATH}/bert_config.json \ 139 | --vocab_path=${BERT_BASE_PATH}/vocab.txt \ 140 | --output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME} \ 141 | --max_seq_len=${max_seq_len}; 142 | } 143 | 144 | #evaluating 145 | function evaluate() 146 | { 147 | $PYTHON_PATH -u main.py \ 148 | --task_name=${TASK_NAME} \ 149 | --use_cuda=$1 \ 150 | --do_eval=True \ 151 | --evaluation_file=${INPUT_PATH}/test.txt \ 152 | --output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME}; 153 | } 154 | 155 | #saving the inference model 156 | function save_inference() 157 | { 158 | $PYTHON_PATH -u main.py \ 159 | --task_name=${TASK_NAME} \ 160 | --use_cuda=$1 \ 161 | --init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params \ 162 | --do_save_inference_model=True \ 163 | --bert_config_path=${BERT_BASE_PATH}/bert_config.json \ 164 | --inference_model_dir=${INFERENCE_MODEL}/${TASK_NAME}; 165 | } 166 | 167 | if [ "${TASK_TYPE}" = "train" ] 168 | then 169 | echo "train $TASK_NAME start.........."; 170 | train $use_cuda; 171 | echo ""train $TASK_NAME finish.......... 172 | elif [ "${TASK_TYPE}" = "predict" ] 173 | then 174 | echo "predict $TASK_NAME start.........."; 175 | predict $use_cuda; 176 | echo "predict $TASK_NAME finish.........."; 177 | elif [ "${TASK_TYPE}" = "evaluate" ] 178 | then 179 | export CUDA_VISIBLE_DEVICES= 180 | echo "evaluate $TASK_NAME start.........."; 181 | evaluate false; 182 | echo "evaluate $TASK_NAME finish.........."; 183 | elif [ "${TASK_TYPE}" = "inference" ] 184 | then 185 | echo "save $TASK_NAME inference model start.........."; 186 | save_inference $use_cuda; 187 | echo "save $TASK_NAME inference model finish.........."; 188 | elif [ "${TASK_TYPE}" = "all" ] 189 | then 190 | echo "Execute train、predict、evaluate and save inference model in sequence...." 191 | train $use_cuda; 192 | predict $use_cuda; 193 | evaluate false; 194 | save_inference $use_cuda; 195 | echo "done"; 196 | else 197 | echo "Parameter $TASK_TYPE is not supported, you can input parameter in [train|predict|evaluate|inference|all]" 198 | exit 255; 199 | fi 200 | 201 | 202 | -------------------------------------------------------------------------------- /DGU/dgu/optimization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Optimization and learning rate scheduling.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import numpy as np 21 | import paddle.fluid as fluid 22 | from dgu.utils.fp16 import create_master_params_grads, master_param_to_train_param 23 | 24 | 25 | def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps): 26 | """ Applies linear warmup of learning rate from 0 and decay to 0.""" 27 | with fluid.default_main_program()._lr_schedule_guard(): 28 | lr = fluid.layers.tensor.create_global_var( 29 | shape=[1], 30 | value=0.0, 31 | dtype='float32', 32 | persistable=True, 33 | name="scheduled_learning_rate") 34 | 35 | global_step = fluid.layers.learning_rate_scheduler._decay_step_counter() 36 | 37 | with fluid.layers.control_flow.Switch() as switch: 38 | with switch.case(global_step < warmup_steps): 39 | warmup_lr = learning_rate * (global_step / warmup_steps) 40 | fluid.layers.tensor.assign(warmup_lr, lr) 41 | with switch.default(): 42 | decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay( 43 | learning_rate=learning_rate, 44 | decay_steps=num_train_steps, 45 | end_learning_rate=0.0, 46 | power=1.0, 47 | cycle=False) 48 | fluid.layers.tensor.assign(decayed_lr, lr) 49 | 50 | return lr 51 | 52 | 53 | def optimization(loss, 54 | warmup_steps, 55 | num_train_steps, 56 | learning_rate, 57 | train_program, 58 | startup_prog, 59 | weight_decay, 60 | scheduler='linear_warmup_decay', 61 | use_fp16=False, 62 | loss_scaling=1.0): 63 | if warmup_steps > 0: 64 | if scheduler == 'noam_decay': 65 | scheduled_lr = fluid.layers.learning_rate_scheduler\ 66 | .noam_decay(1/(warmup_steps *(learning_rate ** 2)), 67 | warmup_steps) 68 | elif scheduler == 'linear_warmup_decay': 69 | scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps, 70 | num_train_steps) 71 | else: 72 | raise ValueError("Unkown learning rate scheduler, should be " 73 | "'noam_decay' or 'linear_warmup_decay'") 74 | optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr) 75 | else: 76 | optimizer = fluid.optimizer.Adam(learning_rate=learning_rate) 77 | scheduled_lr = learning_rate 78 | 79 | clip_norm_thres = 1.0 80 | # When using mixed precision training, scale the gradient clip threshold 81 | # by loss_scaling 82 | if use_fp16 and loss_scaling > 1.0: 83 | clip_norm_thres *= loss_scaling 84 | fluid.clip.set_gradient_clip( 85 | clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres)) 86 | 87 | def exclude_from_weight_decay(name): 88 | if name.find("layer_norm") > -1: 89 | return True 90 | bias_suffix = ["_bias", "_b", ".b_0"] 91 | for suffix in bias_suffix: 92 | if name.endswith(suffix): 93 | return True 94 | return False 95 | 96 | param_list = dict() 97 | 98 | if use_fp16: 99 | param_grads = optimizer.backward(loss) 100 | master_param_grads = create_master_params_grads( 101 | param_grads, train_program, startup_prog, loss_scaling) 102 | 103 | for param, _ in master_param_grads: 104 | param_list[param.name] = param * 1.0 105 | param_list[param.name].stop_gradient = True 106 | 107 | optimizer.apply_gradients(master_param_grads) 108 | 109 | if weight_decay > 0: 110 | for param, grad in master_param_grads: 111 | if exclude_from_weight_decay(param.name.rstrip(".master")): 112 | continue 113 | with param.block.program._optimized_guard( 114 | [param, grad]), fluid.framework.name_scope("weight_decay"): 115 | updated_param = param - param_list[ 116 | param.name] * weight_decay * scheduled_lr 117 | fluid.layers.assign(output=param, input=updated_param) 118 | 119 | master_param_to_train_param(master_param_grads, param_grads, 120 | train_program) 121 | 122 | else: 123 | for param in train_program.global_block().all_parameters(): 124 | param_list[param.name] = param * 1.0 125 | param_list[param.name].stop_gradient = True 126 | 127 | _, param_grads = optimizer.minimize(loss) 128 | 129 | if weight_decay > 0: 130 | for param, grad in param_grads: 131 | if exclude_from_weight_decay(param.name): 132 | continue 133 | with param.block.program._optimized_guard( 134 | [param, grad]), fluid.framework.name_scope("weight_decay"): 135 | updated_param = param - param_list[ 136 | param.name] * weight_decay * scheduled_lr 137 | fluid.layers.assign(output=param, input=updated_param) 138 | 139 | return scheduled_lr 140 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/build_mrda_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """build mrda train dev test dataset""" 16 | 17 | import sys 18 | import csv 19 | import os 20 | import io 21 | import re 22 | 23 | import commonlib 24 | 25 | 26 | class MRDA(object): 27 | """ 28 | dialogue act dataset mrda data process 29 | """ 30 | def __init__(self): 31 | """ 32 | init instance 33 | """ 34 | self.tag_id = 0 35 | self.map_tag_dict = dict() 36 | self.out_dir = "../../data/input/data/mrda" 37 | self.data_list = "./conf/mrda.conf" 38 | self.map_tag = "../../data/input/data/mrda/map_tag_id.txt" 39 | self.voc_map_tag = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/classmaps/map_01b_expanded_w_split" 40 | self.src_dir = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/data" 41 | self._load_file() 42 | self.tag_dict = commonlib.load_voc(self.voc_map_tag) 43 | 44 | def _load_file(self): 45 | """ 46 | load dataset filename 47 | """ 48 | self.dadb_dict = {} 49 | self.trans_dict = {} 50 | self.data_dict = commonlib.load_dict(self.data_list) 51 | file_list, file_path = commonlib.get_file_list(self.src_dir) 52 | for i in range(len(file_list)): 53 | name = file_list[i] 54 | keyword = name.split('.')[0] 55 | if 'dadb' in name: 56 | self.dadb_dict[keyword] = file_path[i] 57 | if 'trans' in name: 58 | self.trans_dict[keyword] = file_path[i] 59 | 60 | def load_dadb(self, data_type): 61 | """ 62 | load dadb dataset 63 | """ 64 | dadb_dict = {} 65 | conv_id_list = [] 66 | dadb_list = self.data_dict[data_type] 67 | for dadb_key in dadb_list: 68 | dadb_file = self.dadb_dict[dadb_key] 69 | fr = io.open(dadb_file, 'r', encoding="utf8") 70 | row = csv.reader(fr, delimiter = ',') 71 | for line in row: 72 | elems = line 73 | conv_id = elems[2] 74 | conv_id_list.append(conv_id) 75 | if len(elems) != 14: 76 | continue 77 | error_code = elems[3] 78 | da_tag = elems[-9] 79 | da_ori_tag = elems[-6] 80 | dadb_dict[conv_id] = (error_code, da_ori_tag, da_tag) 81 | return dadb_dict, conv_id_list 82 | 83 | def load_trans(self, data_type): 84 | """load trans data""" 85 | trans_dict = {} 86 | trans_list = self.data_dict[data_type] 87 | for trans_key in trans_list: 88 | trans_file = self.trans_dict[trans_key] 89 | fr = io.open(trans_file, 'r', encoding="utf8") 90 | row = csv.reader(fr, delimiter = ',') 91 | for line in row: 92 | elems = line 93 | if len(elems) != 3: 94 | continue 95 | conv_id = elems[0] 96 | text = elems[1] 97 | text_process = elems[2] 98 | trans_dict[conv_id] = (text, text_process) 99 | return trans_dict 100 | 101 | def _parser_dataset(self, data_type): 102 | """ 103 | parser train dev test dataset 104 | """ 105 | out_filename = "%s/%s.txt" % (self.out_dir, data_type) 106 | dadb_dict, conv_id_list = self.load_dadb(data_type) 107 | trans_dict = self.load_trans(data_type) 108 | fw = io.open(out_filename, 'w', encoding="utf8") 109 | for elem in conv_id_list: 110 | v_dadb = dadb_dict[elem] 111 | v_trans = trans_dict[elem] 112 | da_tag = v_dadb[2] 113 | if da_tag not in self.tag_dict: 114 | continue 115 | tag = self.tag_dict[da_tag] 116 | if tag == "Z": 117 | continue 118 | if tag not in self.map_tag_dict: 119 | self.map_tag_dict[tag] = self.tag_id 120 | self.tag_id += 1 121 | caller = elem.split('_')[0].split('-')[-1] 122 | conv_no = elem.split('_')[0].split('-')[0] 123 | out = "%s\t%s\t%s\t%s" % (conv_no, self.map_tag_dict[tag], caller, v_trans[0]) 124 | fw.write(u"%s\n" % out) 125 | 126 | def get_train_dataset(self): 127 | """ 128 | parser train dataset and print train.txt 129 | """ 130 | self._parser_dataset("train") 131 | 132 | def get_dev_dataset(self): 133 | """ 134 | parser dev dataset and print dev.txt 135 | """ 136 | self._parser_dataset("dev") 137 | 138 | def get_test_dataset(self): 139 | """ 140 | parser test dataset and print test.txt 141 | """ 142 | self._parser_dataset("test") 143 | 144 | def get_labels(self): 145 | """ 146 | get tag and map ids file 147 | """ 148 | fw = io.open(self.map_tag, 'w', encoding="utf8") 149 | for elem in self.map_tag_dict: 150 | fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem])) 151 | 152 | def main(self): 153 | """ 154 | run data process 155 | """ 156 | self.get_train_dataset() 157 | self.get_dev_dataset() 158 | self.get_test_dataset() 159 | self.get_labels() 160 | 161 | if __name__ == "__main__": 162 | mrda_inst = MRDA() 163 | mrda_inst.main() 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /DAM/utils/reader.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | 4 | def unison_shuffle(data, seed=None): 5 | if seed is not None: 6 | np.random.seed(seed) 7 | 8 | y = np.array(data['y']) 9 | c = np.array(data['c']) 10 | r = np.array(data['r']) 11 | 12 | assert len(y) == len(c) == len(r) 13 | p = np.random.permutation(len(y)) 14 | shuffle_data = {'y': y[p], 'c': c[p], 'r': r[p]} 15 | return shuffle_data 16 | 17 | def split_c(c, split_id): 18 | '''c is a list, example context 19 | split_id is a integer, conf[_EOS_] 20 | return nested list 21 | ''' 22 | turns = [[]] 23 | for _id in c: 24 | if _id != split_id: 25 | turns[-1].append(_id) 26 | else: 27 | turns.append([]) 28 | if turns[-1] == [] and len(turns) > 1: 29 | turns.pop() 30 | return turns 31 | 32 | def normalize_length(_list, length, cut_type='tail'): 33 | '''_list is a list or nested list, example turns/r/single turn c 34 | cut_type is head or tail, if _list len > length is used 35 | return a list len=length and min(read_length, length) 36 | ''' 37 | real_length = len(_list) 38 | if real_length == 0: 39 | return [0]*length, 0 40 | 41 | if real_length <= length: 42 | if not isinstance(_list[0], list): 43 | _list.extend([0]*(length - real_length)) 44 | else: 45 | _list.extend([[]]*(length - real_length)) 46 | return _list, real_length 47 | 48 | if cut_type == 'head': 49 | return _list[:length], length 50 | if cut_type == 'tail': 51 | return _list[-length:], length 52 | 53 | def produce_one_sample(data, index, split_id, max_turn_num, max_turn_len, turn_cut_type='tail', term_cut_type='tail'): 54 | '''max_turn_num=10 55 | max_turn_len=50 56 | return y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len 57 | ''' 58 | c = data['c'][index] 59 | r = data['r'][index][:] 60 | y = data['y'][index] 61 | 62 | turns = split_c(c, split_id) 63 | #normalize turns_c length, nor_turns length is max_turn_num 64 | nor_turns, turn_len = normalize_length(turns, max_turn_num, turn_cut_type) 65 | 66 | nor_turns_nor_c = [] 67 | term_len = [] 68 | #nor_turn_nor_c length is max_turn_num, element is a list length is max_turn_len 69 | for c in nor_turns: 70 | #nor_c length is max_turn_len 71 | nor_c, nor_c_len = normalize_length(c, max_turn_len, term_cut_type) 72 | nor_turns_nor_c.append(nor_c) 73 | term_len.append(nor_c_len) 74 | 75 | nor_r, r_len = normalize_length(r, max_turn_len, term_cut_type) 76 | 77 | return y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len 78 | 79 | def build_one_batch(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail'): 80 | _turns = [] 81 | _tt_turns_len = [] 82 | _every_turn_len = [] 83 | 84 | _response = [] 85 | _response_len = [] 86 | 87 | _label = [] 88 | 89 | for i in range(conf['batch_size']): 90 | index = batch_index * conf['batch_size'] + i 91 | y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len = produce_one_sample(data, index, conf['_EOS_'], conf['max_turn_num'], 92 | conf['max_turn_len'], turn_cut_type, term_cut_type) 93 | 94 | _label.append(y) 95 | _turns.append(nor_turns_nor_c) 96 | _response.append(nor_r) 97 | _every_turn_len.append(term_len) 98 | _tt_turns_len.append(turn_len) 99 | _response_len.append(r_len) 100 | 101 | return _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label 102 | 103 | def build_one_batch_dict(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail'): 104 | _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label = build_one_batch(data, batch_index, conf, turn_cut_type, term_cut_type) 105 | ans = {'turns': _turns, 106 | 'tt_turns_len': _tt_turns_len, 107 | 'every_turn_len': _every_turn_len, 108 | 'response': _response, 109 | 'response_len': _response_len, 110 | 'label': _label} 111 | return ans 112 | 113 | 114 | def build_batches(data, conf, turn_cut_type='tail', term_cut_type='tail'): 115 | _turns_batches = [] 116 | _tt_turns_len_batches = [] 117 | _every_turn_len_batches = [] 118 | 119 | _response_batches = [] 120 | _response_len_batches = [] 121 | 122 | _label_batches = [] 123 | 124 | batch_len = int(len(data['y'])/conf['batch_size']) 125 | for batch_index in range(batch_len): 126 | _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label = build_one_batch(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail') 127 | 128 | _turns_batches.append(_turns) 129 | _tt_turns_len_batches.append(_tt_turns_len) 130 | _every_turn_len_batches.append(_every_turn_len) 131 | 132 | _response_batches.append(_response) 133 | _response_len_batches.append(_response_len) 134 | 135 | _label_batches.append(_label) 136 | 137 | ans = { 138 | "turns": _turns_batches, "tt_turns_len": _tt_turns_len_batches, "every_turn_len":_every_turn_len_batches, 139 | "response": _response_batches, "response_len": _response_len_batches, "label": _label_batches 140 | } 141 | 142 | return ans 143 | 144 | if __name__ == '__main__': 145 | conf = { 146 | "batch_size": 256, 147 | "max_turn_num": 10, 148 | "max_turn_len": 50, 149 | "_EOS_": 28270, 150 | } 151 | train, val, test = pickle.load(open('../../data/data_small.pkl', 'rb')) 152 | print('load data success') 153 | 154 | train_batches = build_batches(train, conf) 155 | val_batches = build_batches(val, conf) 156 | test_batches = build_batches(test, conf) 157 | print('build batches success') 158 | 159 | pickle.dump([train_batches, val_batches, test_batches], open('../../data/batches_small.pkl', 'wb')) 160 | print('dump success') 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/build_dstc2_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """build mrda train dev test dataset""" 16 | 17 | import json 18 | import sys 19 | import csv 20 | import os 21 | import io 22 | import re 23 | 24 | import commonlib 25 | 26 | 27 | class DSTC2(object): 28 | """ 29 | dialogue state tracking dstc2 data process 30 | """ 31 | def __init__(self): 32 | """ 33 | init instance 34 | """ 35 | self.map_tag_dict = {} 36 | self.out_dir = "../../data/input/data/dstc2/dstc2" 37 | self.out_asr_dir = "../../data/input/data/dstc2/dstc2_asr" 38 | self.data_list = "./conf/dstc2.conf" 39 | self.map_tag = "../../data/input/data/dstc2/dstc2/map_tag_id.txt" 40 | self.src_dir = "../../data/input/data/dstc2/source_data" 41 | self.onto_json = "../../data/input/data/dstc2/source_data/ontology_dstc2.json" 42 | self._load_file() 43 | self._load_ontology() 44 | 45 | def _load_file(self): 46 | """ 47 | load dataset filename 48 | """ 49 | self.data_dict = commonlib.load_dict(self.data_list) 50 | for data_type in self.data_dict: 51 | for i in range(len(self.data_dict[data_type])): 52 | self.data_dict[data_type][i] = os.path.join(self.src_dir, self.data_dict[data_type][i]) 53 | 54 | def _load_ontology(self): 55 | """ 56 | load ontology tag 57 | """ 58 | tag_id = 1 59 | self.map_tag_dict['none'] = 0 60 | fr = io.open(self.onto_json, 'r', encoding="utf8") 61 | ontology = json.load(fr) 62 | slots_values = ontology['informable'] 63 | for slot in slots_values: 64 | for value in slots_values[slot]: 65 | key = "%s_%s" % (slot, value) 66 | self.map_tag_dict[key] = tag_id 67 | tag_id += 1 68 | key = "%s_none" % (slot) 69 | self.map_tag_dict[key] = tag_id 70 | tag_id += 1 71 | 72 | def _parser_dataset(self, data_type): 73 | """ 74 | parser train dev test dataset 75 | """ 76 | stat = os.path.exists(self.out_dir) 77 | if not stat: 78 | os.makedirs(self.out_dir) 79 | asr_stat = os.path.exists(self.out_asr_dir) 80 | if not asr_stat: 81 | os.makedirs(self.out_asr_dir) 82 | out_file = os.path.join(self.out_dir, "%s.txt" % data_type) 83 | out_asr_file = os.path.join(self.out_asr_dir, "%s.txt" % data_type) 84 | fw = io.open(out_file, 'w', encoding="utf8") 85 | fw_asr = io.open(out_asr_file, 'w', encoding="utf8") 86 | data_list = self.data_dict.get(data_type) 87 | for fn in data_list: 88 | log_file = os.path.join(fn, "log.json") 89 | label_file = os.path.join(fn, "label.json") 90 | f_log = io.open(log_file, 'r', encoding="utf8") 91 | f_label = io.open(label_file, 'r', encoding="utf8") 92 | log_json = json.load(f_log) 93 | label_json = json.load(f_label) 94 | session_id = log_json['session-id'] 95 | assert len(label_json["turns"]) == len(log_json["turns"]) 96 | for i in range(len(label_json["turns"])): 97 | log_turn = log_json["turns"][i] 98 | label_turn = label_json["turns"][i] 99 | assert log_turn["turn-index"] == label_turn["turn-index"] 100 | labels = ["%s_%s" % (slot, label_turn["goal-labels"][slot]) for slot in label_turn["goal-labels"]] 101 | labels_ids = " ".join([str(self.map_tag_dict.get(label, self.map_tag_dict["%s_none" % label.split('_')[0]])) for label in labels]) 102 | mach = log_turn['output']['transcript'] 103 | user = label_turn['transcription'] 104 | if not labels_ids.strip(): 105 | labels_ids = self.map_tag_dict['none'] 106 | out = "%s\t%s\1%s\t%s" % (session_id.encode('utf-8'), mach.encode('utf-8'), user.encode('utf-8'), labels_ids) 107 | user_asr = log_turn['input']['live']['asr-hyps'][0]['asr-hyp'].strip() 108 | out_asr = "%s\t%s\1%s\t%s" % (session_id.encode('utf-8'), mach.encode('utf-8'), user_asr.encode('utf-8'), labels_ids) 109 | fw.write(u"%s\n" % out) 110 | fw_asr.write(u"%s\n" % out_asr) 111 | 112 | def get_train_dataset(self): 113 | """ 114 | parser train dataset and print train.txt 115 | """ 116 | self._parser_dataset("train") 117 | 118 | def get_dev_dataset(self): 119 | """ 120 | parser dev dataset and print dev.txt 121 | """ 122 | self._parser_dataset("dev") 123 | 124 | def get_test_dataset(self): 125 | """ 126 | parser test dataset and print test.txt 127 | """ 128 | self._parser_dataset("test") 129 | 130 | def get_labels(self): 131 | """ 132 | get tag and map ids file 133 | """ 134 | fw = io.open(self.map_tag, 'w', encoding="utf8") 135 | for elem in self.map_tag_dict: 136 | fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem])) 137 | 138 | def main(self): 139 | """ 140 | run data process 141 | """ 142 | self.get_train_dataset() 143 | self.get_dev_dataset() 144 | self.get_test_dataset() 145 | self.get_labels() 146 | 147 | if __name__ == "__main__": 148 | dstc_inst = DSTC2() 149 | dstc_inst.main() 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /DAM/bin/train_and_evaluate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | 5 | import cPickle as pickle 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | import utils.reader as reader 10 | import utils.evaluation as eva 11 | 12 | 13 | def train(conf, _model): 14 | 15 | if conf['rand_seed'] is not None: 16 | np.random.seed(conf['rand_seed']) 17 | 18 | if not os.path.exists(conf['save_path']): 19 | os.makedirs(conf['save_path']) 20 | 21 | # load data 22 | print('starting loading data') 23 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 24 | train_data, val_data, test_data = pickle.load(open(conf["data_path"], 'rb')) 25 | print('finish loading data') 26 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 27 | 28 | val_batches = reader.build_batches(val_data, conf) 29 | 30 | print("finish building test batches") 31 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 32 | 33 | # refine conf 34 | batch_num = int(len(train_data['y']) / conf["batch_size"]) 35 | val_batch_num = len(val_batches["response"]) 36 | 37 | conf["train_steps"] = conf["num_scan_data"] * batch_num 38 | conf["save_step"] = int(max(1, batch_num / 10)) 39 | conf["print_step"] = int(max(1, batch_num / 100)) 40 | 41 | print('configurations: %s' %conf) 42 | 43 | print('model sucess') 44 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 45 | 46 | _graph = _model.build_graph() 47 | print('build graph sucess') 48 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 49 | 50 | with tf.Session(graph=_graph) as sess: 51 | _model.init.run(); 52 | if conf["init_model"]: 53 | _model.saver.restore(sess, conf["init_model"]) 54 | print("sucess init %s" %conf["init_model"]) 55 | 56 | average_loss = 0.0 57 | batch_index = 0 58 | step = 0 59 | best_result = [0, 0, 0, 0] 60 | 61 | for step_i in xrange(conf["num_scan_data"]): 62 | #for batch_index in rng.permutation(range(batch_num)): 63 | print('starting shuffle train data') 64 | shuffle_train = reader.unison_shuffle(train_data) 65 | train_batches = reader.build_batches(shuffle_train, conf) 66 | print('finish building train data') 67 | for batch_index in range(batch_num): 68 | 69 | feed = { 70 | _model.turns: train_batches["turns"][batch_index], 71 | _model.tt_turns_len: train_batches["tt_turns_len"][batch_index], 72 | _model.every_turn_len: train_batches["every_turn_len"][batch_index], 73 | _model.response: train_batches["response"][batch_index], 74 | _model.response_len: train_batches["response_len"][batch_index], 75 | _model.label: train_batches["label"][batch_index] 76 | } 77 | 78 | batch_index = (batch_index + 1) % batch_num; 79 | 80 | _, curr_loss = sess.run([_model.g_updates, _model.loss], feed_dict = feed) 81 | 82 | 83 | average_loss += curr_loss 84 | 85 | step += 1 86 | 87 | if step % conf["print_step"] == 0 and step > 0: 88 | g_step, lr = sess.run([_model.global_step, _model.learning_rate]) 89 | print('step: %s, lr: %s' %(g_step, lr)) 90 | print("processed: [" + str(step * 1.0 / batch_num) + "] loss: [" + str(average_loss / conf["print_step"]) + "]" ) 91 | average_loss = 0 92 | 93 | 94 | if step % conf["save_step"] == 0 and step > 0: 95 | index = step / conf['save_step'] 96 | score_file_path = conf['save_path'] + 'score.' + str(index) 97 | score_file = open(score_file_path, 'w') 98 | print('save step: %s' %index) 99 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 100 | 101 | for batch_index in xrange(val_batch_num): 102 | 103 | feed = { 104 | _model.turns: val_batches["turns"][batch_index], 105 | _model.tt_turns_len: val_batches["tt_turns_len"][batch_index], 106 | _model.every_turn_len: val_batches["every_turn_len"][batch_index], 107 | _model.response: val_batches["response"][batch_index], 108 | _model.response_len: val_batches["response_len"][batch_index], 109 | _model.label: val_batches["label"][batch_index] 110 | } 111 | 112 | scores = sess.run(_model.logits, feed_dict = feed) 113 | 114 | for i in xrange(conf["batch_size"]): 115 | score_file.write( 116 | str(scores[i]) + '\t' + 117 | str(val_batches["label"][batch_index][i]) + '\n') 118 | score_file.close() 119 | 120 | #write evaluation result 121 | result = eva.evaluate(score_file_path) 122 | result_file_path = conf["save_path"] + "result." + str(index) 123 | with open(result_file_path, 'w') as out_file: 124 | for p_at in result: 125 | out_file.write(str(p_at) + '\n') 126 | print('finish evaluation') 127 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 128 | 129 | if result[1] + result[2] > best_result[1] + best_result[2]: 130 | best_result = result 131 | _save_path = _model.saver.save(sess, conf["save_path"] + "model.ckpt." + str(step / conf["save_step"])) 132 | print("succ saving model in " + _save_path) 133 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/build_atis_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """build swda train dev test dataset""" 17 | 18 | import json 19 | import sys 20 | import csv 21 | import os 22 | import io 23 | import re 24 | 25 | 26 | class ATIS(object): 27 | """ 28 | nlu dataset atis data process 29 | """ 30 | def __init__(self): 31 | """ 32 | init instance 33 | """ 34 | self.slot_id = 2 35 | self.slot_dict = {"PAD": 0, "O": 1} 36 | self.intent_id = 0 37 | self.intent_dict = dict() 38 | self.src_dir = "../../data/input/data/atis/source_data" 39 | self.out_slot_dir = "../../data/input/data/atis/atis_slot" 40 | self.out_intent_dir = "../../data/input/data/atis/atis_intent" 41 | self.map_tag_slot = "../../data/input/data/atis/atis_slot/map_tag_slot_id.txt" 42 | self.map_tag_intent = "../../data/input/data/atis/atis_intent/map_tag_intent_id.txt" 43 | 44 | def _load_file(self, data_type): 45 | """ 46 | load dataset filename 47 | """ 48 | slot_stat = os.path.exists(self.out_slot_dir) 49 | if not slot_stat: 50 | os.makedirs(self.out_slot_dir) 51 | intent_stat = os.path.exists(self.out_intent_dir) 52 | if not intent_stat: 53 | os.makedirs(self.out_intent_dir) 54 | src_examples = [] 55 | json_file = os.path.join(self.src_dir, "%s.json" % data_type) 56 | load_f = io.open(json_file, 'r', encoding="utf8") 57 | json_dict = json.load(load_f) 58 | examples = json_dict['rasa_nlu_data']['common_examples'] 59 | for example in examples: 60 | text = example.get('text') 61 | intent = example.get('intent') 62 | entities = example.get('entities') 63 | src_examples.append((text, intent, entities)) 64 | return src_examples 65 | 66 | def _parser_intent_data(self, examples, data_type): 67 | """ 68 | parser intent dataset 69 | """ 70 | out_filename = "%s/%s.txt" % (self.out_intent_dir, data_type) 71 | fw = io.open(out_filename, 'w', encoding="utf8") 72 | for example in examples: 73 | if example[1] not in self.intent_dict: 74 | self.intent_dict[example[1]] = self.intent_id 75 | self.intent_id += 1 76 | fw.write(u"%s\t%s\n" % (self.intent_dict[example[1]], example[0].lower())) 77 | 78 | fw = io.open(self.map_tag_intent, 'w', encoding="utf8") 79 | for tag in self.intent_dict: 80 | fw.write(u"%s\t%s\n" % (tag, self.intent_dict[tag])) 81 | 82 | def _parser_slot_data(self, examples, data_type): 83 | """ 84 | parser slot dataset 85 | """ 86 | out_filename = "%s/%s.txt" % (self.out_slot_dir, data_type) 87 | fw = io.open(out_filename, 'w', encoding="utf8") 88 | for example in examples: 89 | tags = [] 90 | text = example[0] 91 | entities = example[2] 92 | if not entities: 93 | tags = [str(self.slot_dict['O'])] * len(text.strip().split()) 94 | continue 95 | for i in range(len(entities)): 96 | enty = entities[i] 97 | start = enty['start'] 98 | value_num = len(enty['value'].split()) 99 | tags_slot = [] 100 | for j in range(value_num): 101 | if j == 0: 102 | bround_tag = "B" 103 | else: 104 | bround_tag = "I" 105 | tag = "%s-%s" % (bround_tag, enty['entity']) 106 | if tag not in self.slot_dict: 107 | self.slot_dict[tag] = self.slot_id 108 | self.slot_id += 1 109 | tags_slot.append(str(self.slot_dict[tag])) 110 | if i == 0: 111 | if start not in [0, 1]: 112 | prefix_num = len(text[: start].strip().split()) 113 | tags.extend([str(self.slot_dict['O'])] * prefix_num) 114 | tags.extend(tags_slot) 115 | else: 116 | prefix_num = len(text[entities[i - 1]['end']: start].strip().split()) 117 | tags.extend([str(self.slot_dict['O'])] * prefix_num) 118 | tags.extend(tags_slot) 119 | if entities[-1]['end'] < len(text): 120 | suffix_num = len(text[entities[-1]['end']:].strip().split()) 121 | tags.extend([str(self.slot_dict['O'])] * suffix_num) 122 | fw.write(u"%s\t%s\n" % (text.encode('utf8'), " ".join(tags).encode('utf8'))) 123 | 124 | fw = io.open(self.map_tag_slot, 'w', encoding="utf8") 125 | for slot in self.slot_dict: 126 | fw.write(u"%s\t%s\n" % (slot, self.slot_dict[slot])) 127 | 128 | def get_train_dataset(self): 129 | """ 130 | parser train dataset and print train.txt 131 | """ 132 | train_examples = self._load_file("train") 133 | self._parser_intent_data(train_examples, "train") 134 | self._parser_slot_data(train_examples, "train") 135 | 136 | def get_test_dataset(self): 137 | """ 138 | parser test dataset and print test.txt 139 | """ 140 | test_examples = self._load_file("test") 141 | self._parser_intent_data(test_examples, "test") 142 | self._parser_slot_data(test_examples, "test") 143 | 144 | def main(self): 145 | """ 146 | run data process 147 | """ 148 | self.get_train_dataset() 149 | self.get_test_dataset() 150 | 151 | 152 | if __name__ == "__main__": 153 | atis_inst = ATIS() 154 | atis_inst.main() 155 | 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /DGU/dgu/define_paradigm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """define network paradigm""" 15 | 16 | import sys 17 | import re 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | 22 | 23 | class Paradigm(object): 24 | """ 25 | define network paradigm 26 | """ 27 | 28 | def __init__(self, task_name): 29 | """ 30 | init 31 | """ 32 | self.task_name = task_name 33 | 34 | def create_cls(self, transformer_inst, params): 35 | """ 36 | create classify paradigm network 37 | """ 38 | cls_feats = transformer_inst.get_pooled_output() 39 | cls_feats = fluid.layers.dropout( 40 | x=cls_feats, 41 | dropout_prob=0.1, 42 | dropout_implementation="upscale_in_train") 43 | logits = fluid.layers.fc( 44 | input=cls_feats, 45 | size=params['num_labels'], 46 | param_attr=fluid.ParamAttr( 47 | name="cls_out_w", 48 | initializer=fluid.initializer.TruncatedNormal(scale=0.02)), 49 | bias_attr=fluid.ParamAttr( 50 | name="cls_out_b", initializer=fluid.initializer.Constant(0.))) 51 | 52 | if not params['is_training']: 53 | probs = fluid.layers.softmax(logits) 54 | results = {"probs": probs} 55 | return results 56 | 57 | ce_loss, probs = fluid.layers.softmax_with_cross_entropy( 58 | logits=logits, label=params['labels'], return_softmax=True) 59 | loss = fluid.layers.mean(x=ce_loss) 60 | num_seqs = fluid.layers.create_tensor(dtype='int64') 61 | accuracy = fluid.layers.accuracy( 62 | input=probs, label=params['labels'], total=num_seqs) 63 | 64 | results = { 65 | "loss": loss, 66 | "probs": probs, 67 | "accuracy": accuracy, 68 | "num_seqs": num_seqs 69 | } 70 | return results 71 | 72 | def create_multi_cls(self, transformer_inst, params): 73 | """ 74 | create multi classify paradigm network 75 | """ 76 | cls_feats = transformer_inst.get_pooled_output() 77 | cls_feats = fluid.layers.dropout( 78 | x=cls_feats, 79 | dropout_prob=0.1, 80 | dropout_implementation="upscale_in_train") 81 | logits = fluid.layers.fc( 82 | input=cls_feats, 83 | size=params['num_labels'], 84 | param_attr=fluid.ParamAttr( 85 | name="cls_out_w", 86 | initializer=fluid.initializer.TruncatedNormal(scale=0.02)), 87 | bias_attr=fluid.ParamAttr( 88 | name="cls_out_b", initializer=fluid.initializer.Constant(0.))) 89 | 90 | labels_onehot = fluid.layers.cast(params["labels"], dtype='float32') 91 | ce_loss = fluid.layers.reduce_sum( 92 | fluid.layers.sigmoid_cross_entropy_with_logits( 93 | x=logits, label=labels_onehot)) 94 | loss = fluid.layers.mean(x=ce_loss) 95 | probs = fluid.layers.sigmoid(logits) 96 | 97 | if not params['is_training']: 98 | results = {"probs": probs} 99 | return results 100 | 101 | num_seqs = fluid.layers.tensor.fill_constant( 102 | shape=[1], dtype='int64', value=1) 103 | 104 | results = {"loss": loss, "probs": probs, "num_seqs": num_seqs} 105 | return results 106 | 107 | def create_sequence_tagging(self, transformer_inst, params): 108 | """ 109 | create sequence tagging paradigm 110 | """ 111 | output_layer = transformer_inst.get_sequence_output() 112 | hidden_size = output_layer.shape[-1] 113 | output_layer = fluid.layers.stack(output_layer, axis=1) 114 | output_layer = fluid.layers.reshape(output_layer, [-1, hidden_size]) 115 | 116 | logits = fluid.layers.fc(input=output_layer, size=params['num_labels']) 117 | probs = fluid.layers.cast( 118 | fluid.layers.argmax( 119 | logits, axis=1), dtype='int32') 120 | 121 | if not params['is_training']: 122 | results = {"probs": probs} 123 | return results 124 | 125 | num_seqs = fluid.layers.tensor.fill_constant( 126 | shape=[1], dtype='int64', value=1) 127 | y_label_reshape = fluid.layers.cast( 128 | fluid.layers.reshape(params['labels'], [-1]), dtype='int32') 129 | correct_prediction = fluid.layers.equal(probs, y_label_reshape) 130 | accuracy = fluid.layers.mean( 131 | fluid.layers.cast( 132 | correct_prediction, dtype='float32')) 133 | ce_loss = fluid.layers.softmax_with_cross_entropy(logits=logits, \ 134 | label=fluid.layers.reshape(params['labels'], [-1, 1])) 135 | loss = fluid.layers.mean(x=ce_loss) 136 | 137 | results = { 138 | "loss": loss, 139 | "probs": probs, 140 | "accuracy": accuracy, 141 | "num_seqs": num_seqs 142 | } 143 | return results 144 | 145 | def paradigm(self, transformer_inst, params): 146 | """ 147 | run paradigm 148 | """ 149 | results = None 150 | if self.task_name == 'udc': 151 | results = self.create_cls(transformer_inst, params) 152 | elif self.task_name == 'swda': 153 | results = self.create_cls(transformer_inst, params) 154 | elif self.task_name == 'mrda': 155 | results = self.create_cls(transformer_inst, params) 156 | elif self.task_name == 'atis_intent': 157 | results = self.create_cls(transformer_inst, params) 158 | elif self.task_name == 'atis_slot': 159 | results = self.create_sequence_tagging(transformer_inst, params) 160 | elif self.task_name == 'dstc2': 161 | results = self.create_multi_cls(transformer_inst, params) 162 | return results 163 | -------------------------------------------------------------------------------- /DGU/predict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import io 17 | import os 18 | import sys 19 | import numpy as np 20 | import argparse 21 | import collections 22 | import paddle 23 | import paddle.fluid as fluid 24 | 25 | import dgu.reader as reader 26 | from dgu_net import create_net 27 | import dgu.define_paradigm as define_paradigm 28 | import dgu.define_predict_pack as define_predict_pack 29 | 30 | from dgu.utils.configure import PDConfig 31 | from dgu.utils.input_field import InputField 32 | from dgu.utils.model_check import check_cuda 33 | import dgu.utils.save_load_io as save_load_io 34 | from dgu.utils.py23 import tab_tok, rt_tok 35 | 36 | 37 | def do_predict(args): 38 | """predict function""" 39 | 40 | task_name = args.task_name.lower() 41 | paradigm_inst = define_paradigm.Paradigm(task_name) 42 | pred_inst = define_predict_pack.DefinePredict() 43 | pred_func = getattr(pred_inst, pred_inst.task_map[task_name]) 44 | 45 | processors = { 46 | 'udc': reader.UDCProcessor, 47 | 'swda': reader.SWDAProcessor, 48 | 'mrda': reader.MRDAProcessor, 49 | 'atis_slot': reader.ATISSlotProcessor, 50 | 'atis_intent': reader.ATISIntentProcessor, 51 | 'dstc2': reader.DSTC2Processor, 52 | } 53 | 54 | test_prog = fluid.default_main_program() 55 | startup_prog = fluid.default_startup_program() 56 | 57 | with fluid.program_guard(test_prog, startup_prog): 58 | test_prog.random_seed = args.random_seed 59 | startup_prog.random_seed = args.random_seed 60 | 61 | with fluid.unique_name.guard(): 62 | 63 | # define inputs of the network 64 | num_labels = len(processors[task_name].get_labels()) 65 | 66 | src_ids = fluid.data( 67 | name='src_ids', shape=[-1, args.max_seq_len], dtype='int64') 68 | pos_ids = fluid.data( 69 | name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64') 70 | sent_ids = fluid.data( 71 | name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64') 72 | input_mask = fluid.data( 73 | name='input_mask', 74 | shape=[-1, args.max_seq_len], 75 | dtype='float32') 76 | if args.task_name == 'atis_slot': 77 | labels = fluid.data( 78 | name='labels', shape=[-1, args.max_seq_len], dtype='int64') 79 | elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']: 80 | labels = fluid.data( 81 | name='labels', shape=[-1, num_labels], dtype='int64') 82 | else: 83 | labels = fluid.data(name='labels', shape=[-1, 1], dtype='int64') 84 | 85 | input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] 86 | input_field = InputField(input_inst) 87 | data_reader = fluid.io.PyReader( 88 | feed_list=input_inst, capacity=4, iterable=False) 89 | 90 | results = create_net( 91 | is_training=False, 92 | model_input=input_field, 93 | num_labels=num_labels, 94 | paradigm_inst=paradigm_inst, 95 | args=args) 96 | 97 | probs = results.get("probs", None) 98 | 99 | probs.persistable = True 100 | 101 | fetch_list = [probs.name] 102 | 103 | #for_test is True if change the is_test attribute of operators to True 104 | test_prog = test_prog.clone(for_test=True) 105 | 106 | if args.use_cuda: 107 | place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) 108 | else: 109 | place = fluid.CPUPlace() 110 | 111 | exe = fluid.Executor(place) 112 | exe.run(startup_prog) 113 | 114 | assert (args.init_from_params) or (args.init_from_pretrain_model) 115 | 116 | if args.init_from_params: 117 | save_load_io.init_from_params(args, exe, test_prog) 118 | if args.init_from_pretrain_model: 119 | save_load_io.init_from_pretrain_model(args, exe, test_prog) 120 | 121 | compiled_test_prog = fluid.CompiledProgram(test_prog) 122 | 123 | processor = processors[task_name](data_dir=args.data_dir, 124 | vocab_path=args.vocab_path, 125 | max_seq_len=args.max_seq_len, 126 | do_lower_case=args.do_lower_case, 127 | in_tokens=args.in_tokens, 128 | task_name=task_name, 129 | random_seed=args.random_seed) 130 | batch_generator = processor.data_generator( 131 | batch_size=args.batch_size, phase='test', shuffle=False) 132 | 133 | data_reader.decorate_batch_generator(batch_generator) 134 | data_reader.start() 135 | 136 | all_results = [] 137 | while True: 138 | try: 139 | results = exe.run(compiled_test_prog, fetch_list=fetch_list) 140 | all_results.extend(results[0]) 141 | except fluid.core.EOFException: 142 | data_reader.reset() 143 | break 144 | 145 | np.set_printoptions(precision=4, suppress=True) 146 | print("Write the predicted results into the output_prediction_file") 147 | 148 | fw = io.open(args.output_prediction_file, 'w', encoding="utf8") 149 | if task_name not in ['atis_slot']: 150 | for index, result in enumerate(all_results): 151 | tags = pred_func(result) 152 | fw.write("%s%s%s%s" % (index, tab_tok, tags, rt_tok)) 153 | else: 154 | tags = pred_func(all_results, args.max_seq_len) 155 | for index, tag in enumerate(tags): 156 | fw.write("%s%s%s%s" % (index, tab_tok, tag, rt_tok)) 157 | 158 | 159 | if __name__ == "__main__": 160 | 161 | args = PDConfig(yaml_file="./data/config/dgu.yaml") 162 | args.build() 163 | args.Print() 164 | 165 | check_cuda(args.use_cuda) 166 | 167 | do_predict(args) 168 | -------------------------------------------------------------------------------- /ADE/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export FLAGS_sync_nccl_allreduce=0 4 | export FLAGS_eager_delete_tensor_gb=1.0 5 | 6 | export CUDA_VISIBLE_DEVICES=0 7 | 8 | if [ $# -ne 2 ] 9 | then 10 | echo "please input parameters: TRAIN_TYPE and TASK_TYPE" 11 | echo "TRAIN_TYPE: [matching|seq2seq_naive|seq2seq_att|keywords|human]" 12 | echo "TASK_TYPE: [train|predict|evaluate|inference]" 13 | exit 255 14 | fi 15 | 16 | TRAIN_TYPE=$1 17 | TASK_TYPE=$2 18 | 19 | candi_train_type=("matching" "seq2seq_naive" "seq2seq_att" "keywords" "human") 20 | candi_task_type=("train" "predict" "evaluate" "inference") 21 | 22 | if [[ ! "${candi_train_type[@]}" =~ ${TRAIN_TYPE} ]] 23 | then 24 | echo "unknown parameter: ${TRAIN_TYPE}, just support [matching|seq2seq_naive|seq2seq_att|keywords|human]" 25 | exit 255 26 | fi 27 | 28 | if [[ ! "${candi_task_type[@]}" =~ ${TASK_TYPE} ]] 29 | then 30 | echo "unknown parameter: ${TRAIN_TYPE}, just support [train|predict|evaluate|inference]" 31 | exit 255 32 | fi 33 | 34 | INPUT_PATH="data/input/data" 35 | OUTPUT_PATH="data/output" 36 | SAVED_MODELS="data/saved_models" 37 | INFERENCE_MODEL="data/inference_models" 38 | PYTHON_PATH="python" 39 | 40 | #train pretrain model 41 | if [ ! "$CUDA_VISIBLE_DEVICES" ] 42 | then 43 | export CPU_NUM=1 44 | use_cuda=false 45 | else 46 | use_cuda=true 47 | fi 48 | 49 | #training 50 | function pretrain_train() 51 | { 52 | 53 | pretrain_model_path="${SAVED_MODELS}/matching_pretrained" 54 | if [ -f ${pretrain_model_path} ] 55 | then 56 | rm ${pretrain_model_path} 57 | fi 58 | 59 | if [ ! -d ${pretrain_model_path} ] 60 | then 61 | mkdir ${pretrain_model_path} 62 | fi 63 | 64 | ${PYTHON_PATH} -u main.py \ 65 | --do_train=true \ 66 | --use_cuda=${1} \ 67 | --loss_type="CLS" \ 68 | --max_seq_len=50 \ 69 | --save_model_path=${pretrain_model_path} \ 70 | --save_param="params" \ 71 | --training_file="${INPUT_PATH}/unlabel_data/train.ids" \ 72 | --epoch=20 \ 73 | --print_step=1 \ 74 | --save_step=400 \ 75 | --batch_size=256 \ 76 | --hidden_size=256 \ 77 | --emb_size=256 \ 78 | --vocab_size=484016 \ 79 | --learning_rate=0.001 \ 80 | --sample_pro=0.1 81 | } 82 | 83 | function finetuning_train() 84 | { 85 | save_model_path="${SAVED_MODELS}/${2}_finetuned" 86 | 87 | if [ -f ${save_model_path} ] 88 | then 89 | rm ${save_model_path} 90 | fi 91 | 92 | if [ ! -d ${save_model_path} ] 93 | then 94 | mkdir ${save_model_path} 95 | fi 96 | 97 | ${PYTHON_PATH} -u main.py \ 98 | --do_train=true \ 99 | --use_cuda=${1} \ 100 | --loss_type="L2" \ 101 | --max_seq_len=50 \ 102 | --init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/params/step_final" \ 103 | --save_model_path=${save_model_path} \ 104 | --save_param="params" \ 105 | --training_file="${INPUT_PATH}/label_data/${2}/train.ids" \ 106 | --epoch=50 \ 107 | --print_step=1 \ 108 | --save_step=400 \ 109 | --batch_size=256 \ 110 | --hidden_size=256 \ 111 | --emb_size=256 \ 112 | --vocab_size=484016 \ 113 | --learning_rate=0.001 \ 114 | --sample_pro=0.1 115 | } 116 | 117 | #predict 118 | function pretrain_predict() 119 | { 120 | ${PYTHON_PATH} -u main.py \ 121 | --do_predict=true \ 122 | --use_cuda=${1} \ 123 | --predict_file="${INPUT_PATH}/unlabel_data/test.ids" \ 124 | --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \ 125 | --loss_type="CLS" \ 126 | --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \ 127 | --max_seq_len=50 \ 128 | --batch_size=256 \ 129 | --hidden_size=256 \ 130 | --emb_size=256 \ 131 | --vocab_size=484016 132 | } 133 | 134 | function finetuning_predict() 135 | { 136 | ${PYTHON_PATH} -u main.py \ 137 | --do_predict=true \ 138 | --use_cuda=${1} \ 139 | --predict_file="${INPUT_PATH}/label_data/${2}/test.ids" \ 140 | --init_from_params=${SAVED_MODELS}/trained_models/${2}_finetuned/params \ 141 | --loss_type="L2" \ 142 | --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \ 143 | --max_seq_len=50 \ 144 | --batch_size=256 \ 145 | --hidden_size=256 \ 146 | --emb_size=256 \ 147 | --vocab_size=484016 148 | } 149 | 150 | #evaluate 151 | function pretrain_eval() 152 | { 153 | ${PYTHON_PATH} -u main.py \ 154 | --do_eval=true \ 155 | --use_cuda=${1} \ 156 | --evaluation_file="${INPUT_PATH}/unlabel_data/test.ids" \ 157 | --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \ 158 | --loss_type="CLS" 159 | } 160 | 161 | function finetuning_eval() 162 | { 163 | ${PYTHON_PATH} -u main.py \ 164 | --do_eval=true \ 165 | --use_cuda=${1} \ 166 | --evaluation_file="${INPUT_PATH}/label_data/${2}/test.ids" \ 167 | --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \ 168 | --loss_type="L2" 169 | } 170 | 171 | #inference model 172 | function pretrain_infer() 173 | { 174 | ${PYTHON_PATH} -u main.py \ 175 | --do_save_inference_model=true \ 176 | --use_cuda=${1} \ 177 | --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \ 178 | --inference_model_dir="${INFERENCE_MODEL}/matching_inference_model" 179 | 180 | } 181 | function finetuning_infer() 182 | { 183 | ${PYTHON_PATH} -u main.py \ 184 | --do_save_inference_model=true \ 185 | --use_cuda=${1} \ 186 | --init_from_params="${SAVED_MODELS}/trained_models/${2}_finetuned/params" \ 187 | --inference_model_dir="${INFERENCE_MODEL}/${2}_inference_model" 188 | } 189 | 190 | if [ "${TASK_TYPE}" = "train" ] 191 | then 192 | echo "train ${TRAIN_TYPE} start.........." 193 | if [ "${TRAIN_TYPE}" = "matching" ] 194 | then 195 | pretrain_train ${use_cuda}; 196 | else 197 | finetuning_train ${use_cuda} ${TRAIN_TYPE}; 198 | fi 199 | elif [ "${TASK_TYPE}" = "predict" ] 200 | then 201 | echo "predict ${TRAIN_TYPE} start.........." 202 | if [ "${TRAIN_TYPE}" = "matching" ] 203 | then 204 | pretrain_predict ${use_cuda}; 205 | else 206 | finetuning_predict ${use_cuda} ${TRAIN_TYPE}; 207 | fi 208 | elif [ "${TASK_TYPE}" = "evaluate" ] 209 | then 210 | echo "evaluate ${TRAIN_TYPE} start.........." 211 | if [ "${TRAIN_TYPE}" = "matching" ] 212 | then 213 | pretrain_eval ${use_cuda}; 214 | else 215 | finetuning_eval ${use_cuda} ${TRAIN_TYPE}; 216 | fi 217 | elif [ "${TASK_TYPE}" = "inference" ] 218 | then 219 | echo "save ${TRAIN_TYPE} inference model start.........." 220 | if [ "${TRAIN_TYPE}" = "matching" ] 221 | then 222 | pretrain_infer ${use_cuda}; 223 | else 224 | finetuning_infer ${use_cuda} ${TRAIN_TYPE}; 225 | fi 226 | else 227 | exit 255 228 | fi 229 | 230 | -------------------------------------------------------------------------------- /DAM/models/self_match_net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cPickle as pickle 4 | 5 | import utils.layers as layers 6 | import utils.operations as op 7 | 8 | class Net(object): 9 | '''Add positional encoding(initializer lambda is 0), 10 | cross-attention, cnn integrated and grad clip by value. 11 | 12 | Attributes: 13 | conf: a configuration paramaters dict 14 | word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size] 15 | ''' 16 | def __init__(self, conf): 17 | self._graph = tf.Graph() 18 | self._conf = conf 19 | 20 | if self._conf['word_emb_init'] is not None: 21 | print('loading word emb init') 22 | self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb')) 23 | else: 24 | self._word_embedding_init = None 25 | 26 | def build_graph(self): 27 | with self._graph.as_default(): 28 | rand_seed = self._conf['rand_seed'] 29 | tf.set_random_seed(rand_seed) 30 | 31 | #word embedding 32 | if self._word_embedding_init is not None: 33 | word_embedding_initializer = tf.constant_initializer(self._word_embedding_init) 34 | else: 35 | word_embedding_initializer = tf.random_normal_initializer(stddev=0.1) 36 | 37 | self._word_embedding = tf.get_variable( 38 | name='word_embedding', 39 | shape=[self._conf['vocab_size']+1, self._conf['emb_size']], 40 | dtype=tf.float32, 41 | initializer=word_embedding_initializer) 42 | 43 | 44 | #define placehloders 45 | self.turns = tf.placeholder( 46 | tf.int32, 47 | shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]]) 48 | 49 | self.tt_turns_len = tf.placeholder( 50 | tf.int32, 51 | shape=[self._conf["batch_size"]]) 52 | 53 | self.every_turn_len = tf.placeholder( 54 | tf.int32, 55 | shape=[self._conf["batch_size"], self._conf["max_turn_num"]]) 56 | 57 | self.response = tf.placeholder( 58 | tf.int32, 59 | shape=[self._conf["batch_size"], self._conf["max_turn_len"]]) 60 | 61 | self.response_len = tf.placeholder( 62 | tf.int32, 63 | shape=[self._conf["batch_size"]]) 64 | 65 | self.label = tf.placeholder( 66 | tf.float32, 67 | shape=[self._conf["batch_size"]]) 68 | 69 | 70 | #define operations 71 | #response part 72 | Hr = tf.nn.embedding_lookup(self._word_embedding, self.response) 73 | #Hr_stack = [Hr] 74 | 75 | if self._conf['is_positional'] and self._conf['stack_num'] > 0: 76 | with tf.variable_scope('positional'): 77 | Hr = op.positional_encoding_vector(Hr, max_timescale=10) 78 | Hr_stack = [Hr] 79 | 80 | for index in range(self._conf['stack_num']): 81 | with tf.variable_scope('self_stack_' + str(index)): 82 | Hr = layers.block( 83 | Hr, Hr, Hr, 84 | Q_lengths=self.response_len, K_lengths=self.response_len) 85 | Hr_stack.append(Hr) 86 | 87 | Hr_stack = tf.stack(Hr_stack, axis=-1) 88 | 89 | 90 | #context part 91 | #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len] 92 | list_turn_t = tf.unstack(self.turns, axis=1) 93 | list_turn_length = tf.unstack(self.every_turn_len, axis=1) 94 | 95 | sim_turns = [] 96 | #for every turn_t calculate matching vector 97 | for turn_t, t_turn_length in zip(list_turn_t, list_turn_length): 98 | Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size] 99 | #Hu_stack = [Hu] 100 | 101 | if self._conf['is_positional'] and self._conf['stack_num'] > 0: 102 | with tf.variable_scope('positional', reuse=True): 103 | Hu = op.positional_encoding_vector(Hu, max_timescale=10) 104 | Hu_stack = [Hu] 105 | 106 | 107 | for index in range(self._conf['stack_num']): 108 | 109 | with tf.variable_scope('self_stack_' + str(index), reuse=True): 110 | Hu = layers.block( 111 | Hu, Hu, Hu, 112 | Q_lengths=t_turn_length, K_lengths=t_turn_length) 113 | 114 | Hu_stack.append(Hu) 115 | 116 | 117 | Hu_stack = tf.stack(Hu_stack, axis=-1) 118 | #print('Hu_stack shape: %s' %Hu_stack.shape) 119 | 120 | #calculate similarity matrix 121 | with tf.variable_scope('similarity'): 122 | # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1] 123 | # divide sqrt(200) to prevent gradient explosion 124 | sim = tf.einsum('biks,bjks->bijs', Hu_stack, Hr_stack) / tf.sqrt(200.0) 125 | 126 | sim_turns.append(sim) 127 | 128 | 129 | #cnn and aggregation 130 | sim = tf.stack(sim_turns, axis=1) 131 | print('sim shape: %s' %sim.shape) 132 | with tf.variable_scope('cnn_aggregation'): 133 | final_info = layers.CNN_3d(sim, 32, 16) 134 | #for douban 135 | #final_info = layers.CNN_3d(sim, 16, 16) 136 | 137 | 138 | #loss and train 139 | with tf.variable_scope('loss'): 140 | self.loss, self.logits = layers.loss(final_info, self.label) 141 | 142 | self.global_step = tf.Variable(0, trainable=False) 143 | initial_learning_rate = self._conf['learning_rate'] 144 | self.learning_rate = tf.train.exponential_decay( 145 | initial_learning_rate, 146 | global_step=self.global_step, 147 | decay_steps=400, 148 | decay_rate=0.9, 149 | staircase=True) 150 | 151 | Optimizer = tf.train.AdamOptimizer(self.learning_rate) 152 | self.optimizer = Optimizer.minimize(self.loss) 153 | 154 | self.init = tf.global_variables_initializer() 155 | self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"]) 156 | self.all_variables = tf.global_variables() 157 | self.all_operations = self._graph.get_operations() 158 | self.grads_and_vars = Optimizer.compute_gradients(self.loss) 159 | 160 | for grad, var in self.grads_and_vars: 161 | if grad is None: 162 | print var 163 | 164 | self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars] 165 | self.g_updates = Optimizer.apply_gradients( 166 | self.capped_gvs, 167 | global_step=self.global_step) 168 | 169 | return self._graph 170 | 171 | -------------------------------------------------------------------------------- /ADE/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """train auto dialogue evaluation task""" 16 | import io 17 | import os 18 | import sys 19 | import six 20 | import time 21 | import numpy as np 22 | 23 | import paddle 24 | import paddle.fluid as fluid 25 | 26 | import ade.reader as reader 27 | from ade_net import create_net, set_word_embedding 28 | 29 | from ade.utils.configure import PDConfig 30 | from ade.utils.input_field import InputField 31 | from ade.utils.model_check import check_cuda 32 | import ade.utils.save_load_io as save_load_io 33 | 34 | try: 35 | import cPickle as pickle #python 2 36 | except ImportError as e: 37 | import pickle #python 3 38 | 39 | 40 | def do_train(args): 41 | """train function""" 42 | 43 | train_prog = fluid.default_main_program() 44 | startup_prog = fluid.default_startup_program() 45 | 46 | with fluid.program_guard(train_prog, startup_prog): 47 | train_prog.random_seed = args.random_seed 48 | startup_prog.random_seed = args.random_seed 49 | 50 | with fluid.unique_name.guard(): 51 | context_wordseq = fluid.data( 52 | name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) 53 | response_wordseq = fluid.data( 54 | name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) 55 | labels = fluid.data( 56 | name='labels', shape=[-1, 1], dtype='int64') 57 | 58 | input_inst = [context_wordseq, response_wordseq, labels] 59 | input_field = InputField(input_inst) 60 | data_reader = fluid.io.PyReader(feed_list=input_inst, 61 | capacity=4, iterable=False) 62 | 63 | loss = create_net( 64 | is_training=True, 65 | model_input=input_field, 66 | args=args 67 | ) 68 | loss.persistable = True 69 | # gradient clipping 70 | fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue( 71 | max=1.0, min=-1.0)) 72 | optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) 73 | optimizer.minimize(loss) 74 | 75 | if args.use_cuda: 76 | dev_count = fluid.core.get_cuda_device_count() 77 | place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) 78 | else: 79 | dev_count = int(os.environ.get('CPU_NUM', 1)) 80 | place = fluid.CPUPlace() 81 | 82 | processor = reader.DataProcessor( 83 | data_path=args.training_file, 84 | max_seq_length=args.max_seq_len, 85 | batch_size=args.batch_size) 86 | 87 | batch_generator = processor.data_generator( 88 | place=place, 89 | phase="train", 90 | shuffle=True, 91 | sample_pro=args.sample_pro) 92 | 93 | num_train_examples = processor.get_num_examples(phase='train') 94 | max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size 95 | 96 | print("Num train examples: %d" % num_train_examples) 97 | print("Max train steps: %d" % max_train_steps) 98 | 99 | data_reader.decorate_batch_generator(batch_generator) 100 | 101 | exe = fluid.Executor(place) 102 | exe.run(startup_prog) 103 | 104 | assert (args.init_from_checkpoint == "") or ( 105 | args.init_from_pretrain_model == "") 106 | 107 | #init from some checkpoint, to resume the previous training 108 | if args.init_from_checkpoint: 109 | save_load_io.init_from_checkpoint(args, exe, train_prog) 110 | #init from some pretrain models, to better solve the current task 111 | if args.init_from_pretrain_model: 112 | save_load_io.init_from_pretrain_model(args, exe, train_prog) 113 | 114 | if args.word_emb_init: 115 | print("start loading word embedding init ...") 116 | if six.PY2: 117 | word_emb = np.array(pickle.load(io.open(args.word_emb_init, 'rb'))).astype('float32') 118 | else: 119 | word_emb = np.array(pickle.load(io.open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32') 120 | set_word_embedding(word_emb, place) 121 | print("finish init word embedding ...") 122 | 123 | build_strategy = fluid.compiler.BuildStrategy() 124 | build_strategy.enable_inplace = True 125 | 126 | compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( 127 | loss_name=loss.name, build_strategy=build_strategy) 128 | 129 | steps = 0 130 | begin_time = time.time() 131 | time_begin = time.time() 132 | 133 | for epoch_step in range(args.epoch): 134 | data_reader.start() 135 | sum_loss = 0.0 136 | ce_loss = 0.0 137 | while True: 138 | try: 139 | fetch_list = [loss.name] 140 | outputs = exe.run(compiled_train_prog, fetch_list=fetch_list) 141 | np_loss = outputs 142 | sum_loss += np.array(np_loss).mean() 143 | ce_loss = np.array(np_loss).mean() 144 | 145 | if steps % args.print_steps == 0: 146 | time_end = time.time() 147 | used_time = time_end - time_begin 148 | current_time = time.strftime('%Y-%m-%d %H:%M:%S', 149 | time.localtime(time.time())) 150 | print('%s epoch: %d, step: %s, avg loss %s, speed: %f steps/s' % (current_time, epoch_step, steps, sum_loss / args.print_steps, args.print_steps / used_time)) 151 | sum_loss = 0.0 152 | time_begin = time.time() 153 | 154 | if steps % args.save_steps == 0: 155 | if args.save_checkpoint: 156 | save_load_io.save_checkpoint(args, exe, train_prog, "step_" + str(steps)) 157 | if args.save_param: 158 | save_load_io.save_param(args, exe, train_prog, "step_" + str(steps)) 159 | steps += 1 160 | except fluid.core.EOFException: 161 | data_reader.reset() 162 | break 163 | 164 | if args.save_checkpoint: 165 | save_load_io.save_checkpoint(args, exe, train_prog, "step_final") 166 | if args.save_param: 167 | save_load_io.save_param(args, exe, train_prog, "step_final") 168 | 169 | def get_cards(): 170 | num = 0 171 | cards = os.environ.get('CUDA_VISIBLE_DEVICES', '') 172 | if cards != '': 173 | num = len(cards.split(",")) 174 | return num 175 | 176 | if args.enable_ce: 177 | card_num = get_cards() 178 | pass_time_cost = time.time() - begin_time 179 | print("test_card_num", card_num) 180 | print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost)) 181 | print("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss)) 182 | 183 | 184 | if __name__ == '__main__': 185 | 186 | args = PDConfig(yaml_file="./data/config/ade.yaml") 187 | args.build() 188 | args.Print() 189 | 190 | check_cuda(args.use_cuda) 191 | 192 | do_train(args) 193 | -------------------------------------------------------------------------------- /DAM/models/last_net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cPickle as pickle 4 | 5 | import utils.layers as layers 6 | import utils.operations as op 7 | 8 | class Net(object): 9 | '''Add positional encoding(initializer lambda is 0), 10 | cross-attention, cnn integrated and grad clip by value. 11 | 12 | Attributes: 13 | conf: a configuration paramaters dict 14 | word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size] 15 | ''' 16 | def __init__(self, conf): 17 | self._graph = tf.Graph() 18 | self._conf = conf 19 | 20 | if self._conf['word_emb_init'] is not None: 21 | print('loading word emb init') 22 | self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb')) 23 | else: 24 | self._word_embedding_init = None 25 | 26 | def build_graph(self): 27 | with self._graph.as_default(): 28 | rand_seed = self._conf['rand_seed'] 29 | tf.set_random_seed(rand_seed) 30 | 31 | #word embedding 32 | if self._word_embedding_init is not None: 33 | word_embedding_initializer = tf.constant_initializer(self._word_embedding_init) 34 | else: 35 | word_embedding_initializer = tf.random_normal_initializer(stddev=0.1) 36 | 37 | self._word_embedding = tf.get_variable( 38 | name='word_embedding', 39 | shape=[self._conf['vocab_size']+1, self._conf['emb_size']], 40 | dtype=tf.float32, 41 | initializer=word_embedding_initializer) 42 | 43 | 44 | #define placehloders 45 | self.turns = tf.placeholder( 46 | tf.int32, 47 | shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]]) 48 | 49 | self.tt_turns_len = tf.placeholder( 50 | tf.int32, 51 | shape=[self._conf["batch_size"]]) 52 | 53 | self.every_turn_len = tf.placeholder( 54 | tf.int32, 55 | shape=[self._conf["batch_size"], self._conf["max_turn_num"]]) 56 | 57 | self.response = tf.placeholder( 58 | tf.int32, 59 | shape=[self._conf["batch_size"], self._conf["max_turn_len"]]) 60 | 61 | self.response_len = tf.placeholder( 62 | tf.int32, 63 | shape=[self._conf["batch_size"]]) 64 | 65 | self.label = tf.placeholder( 66 | tf.float32, 67 | shape=[self._conf["batch_size"]]) 68 | 69 | 70 | #define operations 71 | #response part 72 | Hr = tf.nn.embedding_lookup(self._word_embedding, self.response) 73 | 74 | if self._conf['is_positional'] and self._conf['stack_num'] > 0: 75 | with tf.variable_scope('positional'): 76 | Hr = op.positional_encoding_vector(Hr, max_timescale=10) 77 | 78 | for index in range(self._conf['stack_num']): 79 | with tf.variable_scope('self_stack_' + str(index)): 80 | Hr = layers.block( 81 | Hr, Hr, Hr, 82 | Q_lengths=self.response_len, K_lengths=self.response_len) 83 | 84 | #context part 85 | #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len] 86 | list_turn_t = tf.unstack(self.turns, axis=1) 87 | list_turn_length = tf.unstack(self.every_turn_len, axis=1) 88 | 89 | sim_turns = [] 90 | #for every turn_t calculate matching vector 91 | for turn_t, t_turn_length in zip(list_turn_t, list_turn_length): 92 | Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size] 93 | 94 | if self._conf['is_positional'] and self._conf['stack_num'] > 0: 95 | with tf.variable_scope('positional', reuse=True): 96 | Hu = op.positional_encoding_vector(Hu, max_timescale=10) 97 | 98 | for index in range(self._conf['stack_num']): 99 | 100 | with tf.variable_scope('self_stack_' + str(index), reuse=True): 101 | Hu = layers.block( 102 | Hu, Hu, Hu, 103 | Q_lengths=t_turn_length, K_lengths=t_turn_length) 104 | 105 | 106 | 107 | with tf.variable_scope('u_attentd_r_' + str(index)): 108 | try: 109 | u_a_r = layers.block( 110 | Hu, Hr, Hr, 111 | Q_lengths=t_turn_length, K_lengths=self.response_len) 112 | except ValueError: 113 | tf.get_variable_scope().reuse_variables() 114 | u_a_r = layers.block( 115 | Hu, Hr, Hr, 116 | Q_lengths=t_turn_length, K_lengths=self.response_len) 117 | 118 | 119 | with tf.variable_scope('r_attend_u_' + str(index)): 120 | try: 121 | r_a_u = layers.block( 122 | Hr, Hu, Hu, 123 | Q_lengths=self.response_len, K_lengths=t_turn_length) 124 | except ValueError: 125 | tf.get_variable_scope().reuse_variables() 126 | r_a_u = layers.block( 127 | Hr, Hu, Hu, 128 | Q_lengths=self.response_len, K_lengths=t_turn_length) 129 | 130 | u_a_r = tf.stack([u_a_r, Hu], axis=-1) 131 | r_a_u = tf.stack([r_a_u, Hr], axis=-1) 132 | 133 | #calculate similarity matrix 134 | with tf.variable_scope('similarity'): 135 | # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1] 136 | # divide sqrt(200) to prevent gradient explosion 137 | sim = tf.einsum('biks,bjks->bijs', r_a_u, u_a_r) / tf.sqrt(200.0) 138 | 139 | sim_turns.append(sim) 140 | 141 | 142 | #cnn and aggregation 143 | sim = tf.stack(sim_turns, axis=1) 144 | print('sim shape: %s' %sim.shape) 145 | with tf.variable_scope('cnn_aggregation'): 146 | final_info = layers.CNN_3d(sim, 32, 16) 147 | #for douban 148 | #final_info = layers.CNN_3d(sim, 16, 16) 149 | 150 | #loss and train 151 | with tf.variable_scope('loss'): 152 | self.loss, self.logits = layers.loss(final_info, self.label) 153 | 154 | self.global_step = tf.Variable(0, trainable=False) 155 | initial_learning_rate = self._conf['learning_rate'] 156 | self.learning_rate = tf.train.exponential_decay( 157 | initial_learning_rate, 158 | global_step=self.global_step, 159 | decay_steps=400, 160 | decay_rate=0.9, 161 | staircase=True) 162 | 163 | Optimizer = tf.train.AdamOptimizer(self.learning_rate) 164 | self.optimizer = Optimizer.minimize(self.loss) 165 | 166 | self.init = tf.global_variables_initializer() 167 | self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"]) 168 | self.all_variables = tf.global_variables() 169 | self.all_operations = self._graph.get_operations() 170 | self.grads_and_vars = Optimizer.compute_gradients(self.loss) 171 | 172 | for grad, var in self.grads_and_vars: 173 | if grad is None: 174 | print var 175 | 176 | self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars] 177 | self.g_updates = Optimizer.apply_gradients( 178 | self.capped_gvs, 179 | global_step=self.global_step) 180 | 181 | return self._graph 182 | 183 | -------------------------------------------------------------------------------- /DGU/dgu/batching.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Mask, padding and batching.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import numpy as np 21 | 22 | 23 | def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3): 24 | """ 25 | Add mask for batch_tokens, return out, mask_label, mask_pos; 26 | Note: mask_pos responding the batch_tokens after padded; 27 | """ 28 | max_len = max([len(sent) for sent in batch_tokens]) 29 | mask_label = [] 30 | mask_pos = [] 31 | prob_mask = np.random.rand(total_token_num) 32 | # Note: the first token is [CLS], so [low=1] 33 | replace_ids = np.random.randint(1, high=vocab_size, size=total_token_num) 34 | pre_sent_len = 0 35 | prob_index = 0 36 | for sent_index, sent in enumerate(batch_tokens): 37 | mask_flag = False 38 | prob_index += pre_sent_len 39 | for token_index, token in enumerate(sent): 40 | prob = prob_mask[prob_index + token_index] 41 | if prob > 0.15: 42 | continue 43 | elif 0.03 < prob <= 0.15: 44 | # mask 45 | if token != SEP and token != CLS: 46 | mask_label.append(sent[token_index]) 47 | sent[token_index] = MASK 48 | mask_flag = True 49 | mask_pos.append(sent_index * max_len + token_index) 50 | elif 0.015 < prob <= 0.03: 51 | # random replace 52 | if token != SEP and token != CLS: 53 | mask_label.append(sent[token_index]) 54 | sent[token_index] = replace_ids[prob_index + token_index] 55 | mask_flag = True 56 | mask_pos.append(sent_index * max_len + token_index) 57 | else: 58 | # keep the original token 59 | if token != SEP and token != CLS: 60 | mask_label.append(sent[token_index]) 61 | mask_pos.append(sent_index * max_len + token_index) 62 | pre_sent_len = len(sent) 63 | 64 | # ensure at least mask one word in a sentence 65 | while not mask_flag: 66 | token_index = int(np.random.randint(1, high=len(sent) - 1, size=1)) 67 | if sent[token_index] != SEP and sent[token_index] != CLS: 68 | mask_label.append(sent[token_index]) 69 | sent[token_index] = MASK 70 | mask_flag = True 71 | mask_pos.append(sent_index * max_len + token_index) 72 | mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) 73 | mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) 74 | return batch_tokens, mask_label, mask_pos 75 | 76 | 77 | def prepare_batch_data(task_name, 78 | insts, 79 | max_len, 80 | total_token_num, 81 | voc_size=0, 82 | pad_id=None, 83 | cls_id=None, 84 | sep_id=None, 85 | mask_id=None, 86 | return_input_mask=True, 87 | return_max_len=True, 88 | return_num_token=False): 89 | """ 90 | 1. generate Tensor of data 91 | 2. generate Tensor of position 92 | 3. generate self attention mask, [shape: batch_size * max_len * max_len] 93 | """ 94 | batch_src_ids = [inst[0] for inst in insts] 95 | batch_sent_ids = [inst[1] for inst in insts] 96 | batch_pos_ids = [inst[2] for inst in insts] 97 | labels_list = [] 98 | # compatible with squad, whose example includes start/end positions, 99 | # or unique id 100 | 101 | if isinstance(insts[0][3], list): 102 | if task_name == "atis_slot": 103 | labels_list = [inst[3] + [0] * (max_len - len(inst[3])) for inst in insts] 104 | labels_list = [np.array(labels_list).astype("int64").reshape([-1, max_len])] 105 | elif task_name == "dstc2": 106 | labels_list = [inst[3] for inst in insts] 107 | labels_list = [np.array(labels_list).astype("int64")] 108 | else: 109 | for i in range(3, len(insts[0]), 1): 110 | labels = [inst[i] for inst in insts] 111 | labels = np.array(labels).astype("int64").reshape([-1, 1]) 112 | labels_list.append(labels) 113 | 114 | # First step: do mask without padding 115 | if mask_id >= 0: 116 | out, mask_label, mask_pos = mask( 117 | batch_src_ids, 118 | total_token_num, 119 | vocab_size=voc_size, 120 | CLS=cls_id, 121 | SEP=sep_id, 122 | MASK=mask_id) 123 | else: 124 | out = batch_src_ids 125 | # Second step: padding 126 | src_id, self_input_mask = pad_batch_data( 127 | out, 128 | max_len, 129 | pad_idx=pad_id, 130 | return_input_mask=True) 131 | pos_id = pad_batch_data( 132 | batch_pos_ids, 133 | max_len, 134 | pad_idx=pad_id, 135 | return_pos=False, 136 | return_input_mask=False) 137 | sent_id = pad_batch_data( 138 | batch_sent_ids, 139 | max_len, 140 | pad_idx=pad_id, 141 | return_pos=False, 142 | return_input_mask=False) 143 | 144 | if mask_id >= 0: 145 | return_list = [ 146 | src_id, pos_id, sent_id, self_input_mask, mask_label, mask_pos 147 | ] + labels_list 148 | else: 149 | return_list = [src_id, pos_id, sent_id, self_input_mask] + labels_list 150 | 151 | return return_list if len(return_list) > 1 else return_list[0] 152 | 153 | 154 | def pad_batch_data(insts, 155 | max_len_in, 156 | pad_idx=0, 157 | return_pos=False, 158 | return_input_mask=False, 159 | return_max_len=False, 160 | return_num_token=False): 161 | """ 162 | Pad the instances to the max sequence length in batch, and generate the 163 | corresponding position data and attention bias. 164 | """ 165 | return_list = [] 166 | max_len = max_len_in if max_len_in != -1 else max(len(inst) for inst in insts) 167 | # Any token included in dict can be used to pad, since the paddings' loss 168 | # will be masked out by weights and make no effect on parameter gradients. 169 | 170 | inst_data = np.array( 171 | [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts 172 | ]) 173 | return_list += [inst_data.astype("int64").reshape([-1, max_len])] 174 | 175 | # position data 176 | if return_pos: 177 | inst_pos = np.array([ 178 | list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst)) 179 | for inst in insts 180 | ]) 181 | 182 | return_list += [inst_pos.astype("int64").reshape([-1, max_len])] 183 | 184 | if return_input_mask: 185 | # This is used to avoid attention on paddings. 186 | input_mask_data = np.array([[1] * len(inst) + [0] * 187 | (max_len - len(inst)) for inst in insts]) 188 | input_mask_data = np.expand_dims(input_mask_data, axis=-1) 189 | return_list += [input_mask_data.astype("float32")] 190 | 191 | if return_max_len: 192 | return_list += [max_len] 193 | 194 | if return_num_token: 195 | num_token = 0 196 | for inst in insts: 197 | num_token += len(inst) 198 | return_list += [num_token] 199 | 200 | return return_list if len(return_list) > 1 else return_list[0] 201 | 202 | 203 | if __name__ == "__main__": 204 | pass 205 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/build_swda_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """build swda train dev test dataset""" 16 | 17 | import sys 18 | import csv 19 | import os 20 | import io 21 | import re 22 | 23 | import commonlib 24 | 25 | 26 | class SWDA(object): 27 | """ 28 | dialogue act dataset swda data process 29 | """ 30 | def __init__(self): 31 | """ 32 | init instance 33 | """ 34 | self.tag_id = 0 35 | self.map_tag_dict = dict() 36 | self.out_dir = "../../data/input/data/swda" 37 | self.data_list = "./conf/swda.conf" 38 | self.map_tag = "../../data/input/data/swda/map_tag_id.txt" 39 | self.src_dir = "../../data/input/data/swda/source_data/swda" 40 | self._load_file() 41 | 42 | def _load_file(self): 43 | """ 44 | load dataset filename 45 | """ 46 | self.data_dict = commonlib.load_dict(self.data_list) 47 | self.file_dict = {} 48 | child_dir = commonlib.get_dir_list(self.src_dir) 49 | for chd in child_dir: 50 | file_list, file_path = commonlib.get_file_list(chd) 51 | for i in range(len(file_list)): 52 | name = file_list[i] 53 | keyword = "sw%s" % name.split('.')[0].split('_')[-1] 54 | self.file_dict[keyword] = file_path[i] 55 | 56 | def _parser_dataset(self, data_type): 57 | """ 58 | parser train dev test dataset 59 | """ 60 | out_filename = "%s/%s.txt" % (self.out_dir, data_type) 61 | fw = io.open(out_filename, 'w', encoding='utf8') 62 | for name in self.data_dict[data_type]: 63 | file_path = self.file_dict[name] 64 | fr = io.open(file_path, 'r', encoding="utf8") 65 | idx = 0 66 | row = csv.reader(fr, delimiter = ',') 67 | for r in row: 68 | if idx == 0: 69 | idx += 1 70 | continue 71 | out = self._parser_utterence(r) 72 | fw.write(u"%s\n" % out) 73 | 74 | def _clean_text(self, text): 75 | """ 76 | text cleaning for dialogue act dataset 77 | """ 78 | if text.startswith('<') and text.endswith('>.'): 79 | return text 80 | if "[" in text or "]" in text: 81 | stat = True 82 | else: 83 | stat = False 84 | group = re.findall("\[.*?\+.*?\]", text) 85 | while group and stat: 86 | for elem in group: 87 | elem_src = elem 88 | elem = re.sub('\+', '', elem.lstrip('[').rstrip(']')) 89 | text = text.replace(elem_src, elem) 90 | if "[" in text or "]" in text: 91 | stat = True 92 | else: 93 | stat = False 94 | group = re.findall("\[.*?\+.*?\]", text) 95 | if "{" in text or "}" in text: 96 | stat = True 97 | else: 98 | stat = False 99 | group = re.findall("{[A-Z].*?}", text) 100 | while group and stat: 101 | child_group = re.findall("{[A-Z]*(.*?)}", text) 102 | for i in range(len(group)): 103 | text = text.replace(group[i], child_group[i]) 104 | if "{" in text or "}" in text: 105 | stat = True 106 | else: 107 | stat = False 108 | group = re.findall("{[A-Z].*?}", text) 109 | if "(" in text or ")" in text: 110 | stat = True 111 | else: 112 | stat = False 113 | group = re.findall("\(\(.*?\)\)", text) 114 | while group and stat: 115 | for elem in group: 116 | if elem: 117 | elem_clean = re.sub("\(|\)", "", elem) 118 | text = text.replace(elem, elem_clean) 119 | else: 120 | text = text.replace(elem, "mumblex") 121 | if "(" in text or ")" in text: 122 | stat = True 123 | else: 124 | stat = False 125 | group = re.findall("\(\((.*?)\)\)", text) 126 | 127 | group = re.findall("\<.*?\>", text) 128 | if group: 129 | for elem in group: 130 | text = text.replace(elem, "") 131 | 132 | text = re.sub(r" \'s", "\'s", text) 133 | text = re.sub(r" n\'t", "n\'t", text) 134 | text = re.sub(r" \'t", "\'t", text) 135 | text = re.sub(" +", " ", text) 136 | text = text.rstrip('\/').strip().strip('-') 137 | text = re.sub("\[|\]|\+|\>|\<|\{|\}", "", text) 138 | return text.strip().lower() 139 | 140 | def _map_tag(self, da_tag): 141 | """ 142 | map tag to 42 classes 143 | """ 144 | curr_da_tags = [] 145 | curr_das = re.split(r"\s*[,;]\s*", da_tag) 146 | for curr_da in curr_das: 147 | if curr_da == "qy_d" or curr_da == "qw^d" or curr_da == "b^m": 148 | pass 149 | elif curr_da == "nn^e": 150 | curr_da = "ng" 151 | elif curr_da == "ny^e": 152 | curr_da = "na" 153 | else: 154 | curr_da = re.sub(r'(.)\^.*', r'\1', curr_da) 155 | curr_da = re.sub(r'[\(\)@*]', '', curr_da) 156 | tag = curr_da 157 | if tag in ('qr', 'qy'): 158 | tag = 'qy' 159 | elif tag in ('fe', 'ba'): 160 | tag = 'ba' 161 | elif tag in ('oo', 'co', 'cc'): 162 | tag = 'oo_co_cc' 163 | elif tag in ('fx', 'sv'): 164 | tag = 'sv' 165 | elif tag in ('aap', 'am'): 166 | tag = 'aap_am' 167 | elif tag in ('arp', 'nd'): 168 | tag = 'arp_nd' 169 | elif tag in ('fo', 'o', 'fw', '"', 'by', 'bc'): 170 | tag = 'fo_o_fw_"_by_bc' 171 | curr_da = tag 172 | curr_da_tags.append(curr_da) 173 | if curr_da_tags[0] not in self.map_tag_dict: 174 | self.map_tag_dict[curr_da_tags[0]] = self.tag_id 175 | self.tag_id += 1 176 | return self.map_tag_dict[curr_da_tags[0]] 177 | 178 | def _parser_utterence(self, line): 179 | """ 180 | parser one turn dialogue 181 | """ 182 | conversation_no = line[2] 183 | act_tag = line[4] 184 | caller = line[5] 185 | text = line[8] 186 | text = self._clean_text(text) 187 | act_tag = self._map_tag(act_tag) 188 | 189 | out = "%s\t%s\t%s\t%s" % (conversation_no, act_tag, caller, text) 190 | return out 191 | 192 | def get_train_dataset(self): 193 | """ 194 | parser train dataset and print train.txt 195 | """ 196 | self._parser_dataset("train") 197 | 198 | def get_dev_dataset(self): 199 | """ 200 | parser dev dataset and print dev.txt 201 | """ 202 | self._parser_dataset("dev") 203 | 204 | def get_test_dataset(self): 205 | """ 206 | parser test dataset and print test.txt 207 | """ 208 | self._parser_dataset("test") 209 | 210 | def get_labels(self): 211 | """ 212 | get tag and map ids file 213 | """ 214 | fw = io.open(self.map_tag, 'w', encoding='utf8') 215 | for elem in self.map_tag_dict: 216 | fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem])) 217 | 218 | def main(self): 219 | """ 220 | run data process 221 | """ 222 | self.get_train_dataset() 223 | self.get_dev_dataset() 224 | self.get_test_dataset() 225 | self.get_labels() 226 | 227 | if __name__ == "__main__": 228 | swda_inst = SWDA() 229 | swda_inst.main() 230 | 231 | 232 | 233 | 234 | -------------------------------------------------------------------------------- /DAM/models/cross_match_net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cPickle as pickle 4 | 5 | import utils.layers as layers 6 | import utils.operations as op 7 | 8 | class Net(object): 9 | '''Add positional encoding(initializer lambda is 0), 10 | cross-attention, cnn integrated and grad clip by value. 11 | 12 | Attributes: 13 | conf: a configuration paramaters dict 14 | word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size] 15 | ''' 16 | def __init__(self, conf): 17 | self._graph = tf.Graph() 18 | self._conf = conf 19 | 20 | if self._conf['word_emb_init'] is not None: 21 | print('loading word emb init') 22 | self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb')) 23 | else: 24 | self._word_embedding_init = None 25 | 26 | def build_graph(self): 27 | with self._graph.as_default(): 28 | rand_seed = self._conf['rand_seed'] 29 | tf.set_random_seed(rand_seed) 30 | 31 | #word embedding 32 | if self._word_embedding_init is not None: 33 | word_embedding_initializer = tf.constant_initializer(self._word_embedding_init) 34 | else: 35 | word_embedding_initializer = tf.random_normal_initializer(stddev=0.1) 36 | 37 | self._word_embedding = tf.get_variable( 38 | name='word_embedding', 39 | shape=[self._conf['vocab_size']+1, self._conf['emb_size']], 40 | dtype=tf.float32, 41 | initializer=word_embedding_initializer) 42 | 43 | 44 | #define placehloders 45 | self.turns = tf.placeholder( 46 | tf.int32, 47 | shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]]) 48 | 49 | self.tt_turns_len = tf.placeholder( 50 | tf.int32, 51 | shape=[self._conf["batch_size"]]) 52 | 53 | self.every_turn_len = tf.placeholder( 54 | tf.int32, 55 | shape=[self._conf["batch_size"], self._conf["max_turn_num"]]) 56 | 57 | self.response = tf.placeholder( 58 | tf.int32, 59 | shape=[self._conf["batch_size"], self._conf["max_turn_len"]]) 60 | 61 | self.response_len = tf.placeholder( 62 | tf.int32, 63 | shape=[self._conf["batch_size"]]) 64 | 65 | self.label = tf.placeholder( 66 | tf.float32, 67 | shape=[self._conf["batch_size"]]) 68 | 69 | 70 | #define operations 71 | #response part 72 | Hr = tf.nn.embedding_lookup(self._word_embedding, self.response) 73 | 74 | if self._conf['is_positional'] and self._conf['stack_num'] > 0: 75 | with tf.variable_scope('positional'): 76 | Hr = op.positional_encoding_vector(Hr, max_timescale=10) 77 | Hr_stack = [Hr] 78 | 79 | for index in range(self._conf['stack_num']): 80 | with tf.variable_scope('self_stack_' + str(index)): 81 | Hr = layers.block( 82 | Hr, Hr, Hr, 83 | Q_lengths=self.response_len, K_lengths=self.response_len) 84 | Hr_stack.append(Hr) 85 | 86 | 87 | #context part 88 | #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len] 89 | list_turn_t = tf.unstack(self.turns, axis=1) 90 | list_turn_length = tf.unstack(self.every_turn_len, axis=1) 91 | 92 | sim_turns = [] 93 | #for every turn_t calculate matching vector 94 | for turn_t, t_turn_length in zip(list_turn_t, list_turn_length): 95 | Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size] 96 | 97 | if self._conf['is_positional'] and self._conf['stack_num'] > 0: 98 | with tf.variable_scope('positional', reuse=True): 99 | Hu = op.positional_encoding_vector(Hu, max_timescale=10) 100 | Hu_stack = [Hu] 101 | 102 | for index in range(self._conf['stack_num']): 103 | 104 | with tf.variable_scope('self_stack_' + str(index), reuse=True): 105 | Hu = layers.block( 106 | Hu, Hu, Hu, 107 | Q_lengths=t_turn_length, K_lengths=t_turn_length) 108 | 109 | Hu_stack.append(Hu) 110 | 111 | 112 | 113 | r_a_t_stack = [] 114 | t_a_r_stack = [] 115 | for index in range(self._conf['stack_num']+1): 116 | 117 | with tf.variable_scope('t_attend_r_' + str(index)): 118 | try: 119 | t_a_r = layers.block( 120 | Hu_stack[index], Hr_stack[index], Hr_stack[index], 121 | Q_lengths=t_turn_length, K_lengths=self.response_len) 122 | except ValueError: 123 | tf.get_variable_scope().reuse_variables() 124 | t_a_r = layers.block( 125 | Hu_stack[index], Hr_stack[index], Hr_stack[index], 126 | Q_lengths=t_turn_length, K_lengths=self.response_len) 127 | 128 | 129 | with tf.variable_scope('r_attend_t_' + str(index)): 130 | try: 131 | r_a_t = layers.block( 132 | Hr_stack[index], Hu_stack[index], Hu_stack[index], 133 | Q_lengths=self.response_len, K_lengths=t_turn_length) 134 | except ValueError: 135 | tf.get_variable_scope().reuse_variables() 136 | r_a_t = layers.block( 137 | Hr_stack[index], Hu_stack[index], Hu_stack[index], 138 | Q_lengths=self.response_len, K_lengths=t_turn_length) 139 | 140 | t_a_r_stack.append(t_a_r) 141 | r_a_t_stack.append(r_a_t) 142 | 143 | 144 | t_a_r = tf.stack(t_a_r_stack, axis=-1) 145 | r_a_t = tf.stack(r_a_t_stack, axis=-1) 146 | 147 | 148 | #calculate similarity matrix 149 | with tf.variable_scope('similarity'): 150 | # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1] 151 | # divide sqrt(200) to prevent gradient explosion 152 | sim = tf.einsum('biks,bjks->bijs', t_a_r, r_a_t) / tf.sqrt(200.0) 153 | 154 | sim_turns.append(sim) 155 | 156 | 157 | #cnn and aggregation 158 | sim = tf.stack(sim_turns, axis=1) 159 | print('sim shape: %s' %sim.shape) 160 | with tf.variable_scope('cnn_aggregation'): 161 | final_info = layers.CNN_3d(sim, 32, 16) 162 | #for douban 163 | #final_info = layers.CNN_3d(sim, 16, 16) 164 | 165 | 166 | #loss and train 167 | with tf.variable_scope('loss'): 168 | self.loss, self.logits = layers.loss(final_info, self.label) 169 | 170 | self.global_step = tf.Variable(0, trainable=False) 171 | initial_learning_rate = self._conf['learning_rate'] 172 | self.learning_rate = tf.train.exponential_decay( 173 | initial_learning_rate, 174 | global_step=self.global_step, 175 | decay_steps=400, 176 | decay_rate=0.9, 177 | staircase=True) 178 | 179 | Optimizer = tf.train.AdamOptimizer(self.learning_rate) 180 | self.optimizer = Optimizer.minimize( 181 | self.loss, 182 | global_step=self.global_step) 183 | 184 | self.init = tf.global_variables_initializer() 185 | self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"]) 186 | self.all_variables = tf.global_variables() 187 | self.all_operations = self._graph.get_operations() 188 | self.grads_and_vars = Optimizer.compute_gradients(self.loss) 189 | 190 | for grad, var in self.grads_and_vars: 191 | if grad is None: 192 | print var 193 | 194 | self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars] 195 | self.g_updates = Optimizer.apply_gradients( 196 | self.capped_gvs, 197 | global_step=self.global_step) 198 | 199 | return self._graph 200 | 201 | -------------------------------------------------------------------------------- /DAM/models/net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cPickle as pickle 4 | 5 | import utils.layers as layers 6 | import utils.operations as op 7 | 8 | class Net(object): 9 | '''Add positional encoding(initializer lambda is 0), 10 | cross-attention, cnn integrated and grad clip by value. 11 | 12 | Attributes: 13 | conf: a configuration paramaters dict 14 | word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size] 15 | ''' 16 | def __init__(self, conf): 17 | self._graph = tf.Graph() 18 | self._conf = conf 19 | 20 | if self._conf['word_emb_init'] is not None: 21 | print('loading word emb init') 22 | self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb')) 23 | else: 24 | self._word_embedding_init = None 25 | 26 | def build_graph(self): 27 | with self._graph.as_default(): 28 | if self._conf['rand_seed'] is not None: 29 | rand_seed = self._conf['rand_seed'] 30 | tf.set_random_seed(rand_seed) 31 | print('set tf random seed: %s' %self._conf['rand_seed']) 32 | 33 | #word embedding 34 | if self._word_embedding_init is not None: 35 | word_embedding_initializer = tf.constant_initializer(self._word_embedding_init) 36 | else: 37 | word_embedding_initializer = tf.random_normal_initializer(stddev=0.1) 38 | 39 | self._word_embedding = tf.get_variable( 40 | name='word_embedding', 41 | shape=[self._conf['vocab_size']+1, self._conf['emb_size']], 42 | dtype=tf.float32, 43 | initializer=word_embedding_initializer) 44 | 45 | 46 | #define placehloders 47 | self.turns = tf.placeholder( 48 | tf.int32, 49 | shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]]) 50 | 51 | self.tt_turns_len = tf.placeholder( 52 | tf.int32, 53 | shape=[self._conf["batch_size"]]) 54 | 55 | self.every_turn_len = tf.placeholder( 56 | tf.int32, 57 | shape=[self._conf["batch_size"], self._conf["max_turn_num"]]) 58 | 59 | self.response = tf.placeholder( 60 | tf.int32, 61 | shape=[self._conf["batch_size"], self._conf["max_turn_len"]]) 62 | 63 | self.response_len = tf.placeholder( 64 | tf.int32, 65 | shape=[self._conf["batch_size"]]) 66 | 67 | self.label = tf.placeholder( 68 | tf.float32, 69 | shape=[self._conf["batch_size"]]) 70 | 71 | 72 | #define operations 73 | #response part 74 | Hr = tf.nn.embedding_lookup(self._word_embedding, self.response) 75 | 76 | if self._conf['is_positional'] and self._conf['stack_num'] > 0: 77 | with tf.variable_scope('positional'): 78 | Hr = op.positional_encoding_vector(Hr, max_timescale=10) 79 | Hr_stack = [Hr] 80 | 81 | for index in range(self._conf['stack_num']): 82 | with tf.variable_scope('self_stack_' + str(index)): 83 | Hr = layers.block( 84 | Hr, Hr, Hr, 85 | Q_lengths=self.response_len, K_lengths=self.response_len) 86 | Hr_stack.append(Hr) 87 | 88 | 89 | #context part 90 | #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len] 91 | list_turn_t = tf.unstack(self.turns, axis=1) 92 | list_turn_length = tf.unstack(self.every_turn_len, axis=1) 93 | 94 | sim_turns = [] 95 | #for every turn_t calculate matching vector 96 | for turn_t, t_turn_length in zip(list_turn_t, list_turn_length): 97 | Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size] 98 | 99 | if self._conf['is_positional'] and self._conf['stack_num'] > 0: 100 | with tf.variable_scope('positional', reuse=True): 101 | Hu = op.positional_encoding_vector(Hu, max_timescale=10) 102 | Hu_stack = [Hu] 103 | 104 | for index in range(self._conf['stack_num']): 105 | 106 | with tf.variable_scope('self_stack_' + str(index), reuse=True): 107 | Hu = layers.block( 108 | Hu, Hu, Hu, 109 | Q_lengths=t_turn_length, K_lengths=t_turn_length) 110 | 111 | Hu_stack.append(Hu) 112 | 113 | 114 | 115 | r_a_t_stack = [] 116 | t_a_r_stack = [] 117 | for index in range(self._conf['stack_num']+1): 118 | 119 | with tf.variable_scope('t_attend_r_' + str(index)): 120 | try: 121 | t_a_r = layers.block( 122 | Hu_stack[index], Hr_stack[index], Hr_stack[index], 123 | Q_lengths=t_turn_length, K_lengths=self.response_len) 124 | except ValueError: 125 | tf.get_variable_scope().reuse_variables() 126 | t_a_r = layers.block( 127 | Hu_stack[index], Hr_stack[index], Hr_stack[index], 128 | Q_lengths=t_turn_length, K_lengths=self.response_len) 129 | 130 | 131 | with tf.variable_scope('r_attend_t_' + str(index)): 132 | try: 133 | r_a_t = layers.block( 134 | Hr_stack[index], Hu_stack[index], Hu_stack[index], 135 | Q_lengths=self.response_len, K_lengths=t_turn_length) 136 | except ValueError: 137 | tf.get_variable_scope().reuse_variables() 138 | r_a_t = layers.block( 139 | Hr_stack[index], Hu_stack[index], Hu_stack[index], 140 | Q_lengths=self.response_len, K_lengths=t_turn_length) 141 | 142 | t_a_r_stack.append(t_a_r) 143 | r_a_t_stack.append(r_a_t) 144 | 145 | t_a_r_stack.extend(Hu_stack) 146 | r_a_t_stack.extend(Hr_stack) 147 | 148 | t_a_r = tf.stack(t_a_r_stack, axis=-1) 149 | r_a_t = tf.stack(r_a_t_stack, axis=-1) 150 | 151 | 152 | #calculate similarity matrix 153 | with tf.variable_scope('similarity'): 154 | # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1] 155 | # divide sqrt(200) to prevent gradient explosion 156 | sim = tf.einsum('biks,bjks->bijs', t_a_r, r_a_t) / tf.sqrt(200.0) 157 | 158 | sim_turns.append(sim) 159 | 160 | 161 | #cnn and aggregation 162 | sim = tf.stack(sim_turns, axis=1) 163 | print('sim shape: %s' %sim.shape) 164 | with tf.variable_scope('cnn_aggregation'): 165 | final_info = layers.CNN_3d(sim, 32, 16) 166 | #for douban 167 | #final_info = layers.CNN_3d(sim, 16, 16) 168 | 169 | #loss and train 170 | with tf.variable_scope('loss'): 171 | self.loss, self.logits = layers.loss(final_info, self.label) 172 | 173 | self.global_step = tf.Variable(0, trainable=False) 174 | initial_learning_rate = self._conf['learning_rate'] 175 | self.learning_rate = tf.train.exponential_decay( 176 | initial_learning_rate, 177 | global_step=self.global_step, 178 | decay_steps=400, 179 | decay_rate=0.9, 180 | staircase=True) 181 | 182 | Optimizer = tf.train.AdamOptimizer(self.learning_rate) 183 | self.optimizer = Optimizer.minimize( 184 | self.loss, 185 | global_step=self.global_step) 186 | 187 | self.init = tf.global_variables_initializer() 188 | self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"]) 189 | self.all_variables = tf.global_variables() 190 | self.all_operations = self._graph.get_operations() 191 | self.grads_and_vars = Optimizer.compute_gradients(self.loss) 192 | 193 | for grad, var in self.grads_and_vars: 194 | if grad is None: 195 | print var 196 | 197 | self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars] 198 | self.g_updates = Optimizer.apply_gradients( 199 | self.capped_gvs, 200 | global_step=self.global_step) 201 | 202 | return self._graph 203 | 204 | -------------------------------------------------------------------------------- /DGU/dgu/bert.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """BERT model.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | import io 23 | import sys 24 | import six 25 | import json 26 | import numpy as np 27 | import paddle.fluid as fluid 28 | 29 | from dgu.transformer_encoder import encoder, pre_process_layer 30 | 31 | 32 | class BertConfig(object): 33 | def __init__(self, config_path): 34 | self._config_dict = self._parse(config_path) 35 | 36 | def _parse(self, config_path): 37 | try: 38 | json_file = io.open(config_path, 'r', encoding="utf8") 39 | config_dict = json.load(json_file) 40 | except Exception: 41 | raise IOError("Error in parsing bert model config file '%s'" % 42 | config_path) 43 | else: 44 | return config_dict 45 | 46 | def __getitem__(self, key): 47 | return self._config_dict[key] 48 | 49 | def print_config(self): 50 | for arg, value in sorted(six.iteritems(self._config_dict)): 51 | print('%s: %s' % (arg, value)) 52 | print('------------------------------------------------') 53 | 54 | 55 | class BertModel(object): 56 | def __init__(self, 57 | src_ids, 58 | position_ids, 59 | sentence_ids, 60 | input_mask, 61 | config, 62 | weight_sharing=True, 63 | use_fp16=False): 64 | 65 | self._emb_size = config['hidden_size'] 66 | self._n_layer = config['num_hidden_layers'] 67 | self._n_head = config['num_attention_heads'] 68 | self._voc_size = config['vocab_size'] 69 | self._max_position_seq_len = config['max_position_embeddings'] 70 | self._sent_types = config['type_vocab_size'] 71 | self._hidden_act = config['hidden_act'] 72 | self._prepostprocess_dropout = config['hidden_dropout_prob'] 73 | self._attention_dropout = config['attention_probs_dropout_prob'] 74 | self._weight_sharing = weight_sharing 75 | 76 | self._word_emb_name = "word_embedding" 77 | self._pos_emb_name = "pos_embedding" 78 | self._sent_emb_name = "sent_embedding" 79 | self._dtype = "float16" if use_fp16 else "float32" 80 | 81 | # Initialize all weigths by truncated normal initializer, and all biases 82 | # will be initialized by constant zero by default. 83 | self._param_initializer = fluid.initializer.TruncatedNormal( 84 | scale=config['initializer_range']) 85 | 86 | self._build_model(src_ids, position_ids, sentence_ids, input_mask) 87 | 88 | def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): 89 | # padding id in vocabulary must be set to 0 90 | emb_out = fluid.input.embedding( 91 | input=src_ids, 92 | size=[self._voc_size, self._emb_size], 93 | dtype=self._dtype, 94 | param_attr=fluid.ParamAttr( 95 | name=self._word_emb_name, initializer=self._param_initializer), 96 | is_sparse=False) 97 | position_emb_out = fluid.input.embedding( 98 | input=position_ids, 99 | size=[self._max_position_seq_len, self._emb_size], 100 | dtype=self._dtype, 101 | param_attr=fluid.ParamAttr( 102 | name=self._pos_emb_name, initializer=self._param_initializer)) 103 | 104 | sent_emb_out = fluid.input.embedding( 105 | sentence_ids, 106 | size=[self._sent_types, self._emb_size], 107 | dtype=self._dtype, 108 | param_attr=fluid.ParamAttr( 109 | name=self._sent_emb_name, initializer=self._param_initializer)) 110 | 111 | emb_out = emb_out + position_emb_out 112 | emb_out = emb_out + sent_emb_out 113 | 114 | emb_out = pre_process_layer( 115 | emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') 116 | 117 | if self._dtype == "float16": 118 | input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) 119 | 120 | self_attn_mask = fluid.layers.matmul( 121 | x=input_mask, y=input_mask, transpose_y=True) 122 | self_attn_mask = fluid.layers.scale( 123 | x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) 124 | n_head_self_attn_mask = fluid.layers.stack( 125 | x=[self_attn_mask] * self._n_head, axis=1) 126 | n_head_self_attn_mask.stop_gradient = True 127 | 128 | self._enc_out = encoder( 129 | enc_input=emb_out, 130 | attn_bias=n_head_self_attn_mask, 131 | n_layer=self._n_layer, 132 | n_head=self._n_head, 133 | d_key=self._emb_size // self._n_head, 134 | d_value=self._emb_size // self._n_head, 135 | d_model=self._emb_size, 136 | d_inner_hid=self._emb_size * 4, 137 | prepostprocess_dropout=self._prepostprocess_dropout, 138 | attention_dropout=self._attention_dropout, 139 | relu_dropout=0, 140 | hidden_act=self._hidden_act, 141 | preprocess_cmd="", 142 | postprocess_cmd="dan", 143 | param_initializer=self._param_initializer, 144 | name='encoder') 145 | 146 | def get_sequence_output(self): 147 | return self._enc_out 148 | 149 | def get_pooled_output(self): 150 | """Get the first feature of each sequence for classification""" 151 | 152 | next_sent_feat = fluid.layers.slice( 153 | input=self._enc_out, axes=[1], starts=[0], ends=[1]) 154 | next_sent_feat = fluid.layers.fc( 155 | input=next_sent_feat, 156 | size=self._emb_size, 157 | act="tanh", 158 | param_attr=fluid.ParamAttr( 159 | name="pooled_fc.w_0", initializer=self._param_initializer), 160 | bias_attr="pooled_fc.b_0") 161 | return next_sent_feat 162 | 163 | def get_pretraining_output(self, mask_label, mask_pos, labels): 164 | """Get the loss & accuracy for pretraining""" 165 | 166 | mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') 167 | 168 | # extract the first token feature in each sentence 169 | next_sent_feat = self.get_pooled_output() 170 | reshaped_emb_out = fluid.layers.reshape( 171 | x=self._enc_out, shape=[-1, self._emb_size]) 172 | # extract masked tokens' feature 173 | mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) 174 | 175 | # transform: fc 176 | mask_trans_feat = fluid.layers.fc( 177 | input=mask_feat, 178 | size=self._emb_size, 179 | act=self._hidden_act, 180 | param_attr=fluid.ParamAttr( 181 | name='mask_lm_trans_fc.w_0', 182 | initializer=self._param_initializer), 183 | bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) 184 | # transform: layer norm 185 | mask_trans_feat = pre_process_layer( 186 | mask_trans_feat, 'n', name='mask_lm_trans') 187 | 188 | mask_lm_out_bias_attr = fluid.ParamAttr( 189 | name="mask_lm_out_fc.b_0", 190 | initializer=fluid.initializer.Constant(value=0.0)) 191 | if self._weight_sharing: 192 | fc_out = fluid.layers.matmul( 193 | x=mask_trans_feat, 194 | y=fluid.default_main_program().global_block().var( 195 | self._word_emb_name), 196 | transpose_y=True) 197 | fc_out += fluid.layers.create_parameter( 198 | shape=[self._voc_size], 199 | dtype=self._dtype, 200 | attr=mask_lm_out_bias_attr, 201 | is_bias=True) 202 | 203 | else: 204 | fc_out = fluid.layers.fc(input=mask_trans_feat, 205 | size=self._voc_size, 206 | param_attr=fluid.ParamAttr( 207 | name="mask_lm_out_fc.w_0", 208 | initializer=self._param_initializer), 209 | bias_attr=mask_lm_out_bias_attr) 210 | 211 | mask_lm_loss = fluid.layers.softmax_with_cross_entropy( 212 | logits=fc_out, label=mask_label) 213 | mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) 214 | 215 | next_sent_fc_out = fluid.layers.fc( 216 | input=next_sent_feat, 217 | size=2, 218 | param_attr=fluid.ParamAttr( 219 | name="next_sent_fc.w_0", initializer=self._param_initializer), 220 | bias_attr="next_sent_fc.b_0") 221 | 222 | next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( 223 | logits=next_sent_fc_out, label=labels, return_softmax=True) 224 | 225 | next_sent_acc = fluid.layers.accuracy( 226 | input=next_sent_softmax, label=labels) 227 | 228 | mean_next_sent_loss = fluid.layers.mean(next_sent_loss) 229 | 230 | loss = mean_next_sent_loss + mean_mask_lm_loss 231 | return next_sent_acc, mean_mask_lm_loss, loss 232 | --------------------------------------------------------------------------------