├── ADE ├── ade │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── input_field.py │ │ ├── model_check.py │ │ └── save_load_io.py │ ├── evaluate.py │ ├── prepare_data_and_model.py │ └── reader.py ├── data │ ├── input │ │ └── input.md │ ├── output │ │ └── output.md │ ├── inference_models │ │ └── inference_models.md │ ├── saved_models │ │ └── saved_models.md │ ├── pretrain_model │ │ └── pretrain_model.md │ └── config │ │ └── ade.yaml ├── main.py ├── .run_ce.sh ├── _ce.py ├── eval.py ├── inference_model.py ├── ade_net.py ├── predict.py ├── run.sh └── train.py ├── DAM ├── bin │ ├── __init__.py │ ├── test_and_evaluate.py │ └── train_and_evaluate.py ├── models │ ├── __init__.py │ ├── self_match_net.py │ ├── last_net.py │ ├── cross_match_net.py │ └── net.py ├── utils │ ├── __init__.py │ ├── evaluation.py │ ├── douban_evaluation.py │ └── reader.py ├── log │ └── ReadMe.txt ├── output │ └── ReadMe.txt ├── run.sh ├── appendix │ ├── Figure1.png │ └── Figure2.png ├── data │ └── ReadMe.txt ├── main.py └── README.md ├── DGU ├── dgu │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── py23.py │ │ ├── input_field.py │ │ ├── model_check.py │ │ ├── fp16.py │ │ └── save_load_io.py │ ├── scripts │ │ ├── README.md │ │ ├── conf │ │ │ └── mrda.conf │ │ ├── run_build_data.py │ │ ├── commonlib.py │ │ ├── build_mrda_dataset.py │ │ ├── build_dstc2_dataset.py │ │ ├── build_atis_dataset.py │ │ └── build_swda_dataset.py │ ├── prepare_data_and_model.py │ ├── define_predict_pack.py │ ├── optimization.py │ ├── define_paradigm.py │ ├── batching.py │ └── bert.py ├── data │ ├── input │ │ └── input.md │ ├── output │ │ └── output.md │ ├── inference_models │ │ └── inference_models.md │ ├── saved_models │ │ └── saved_models.md │ ├── pretrain_model │ │ └── pretrain_model.md │ └── config │ │ └── dgu.yaml ├── images │ └── dgu.png ├── eval.py ├── main.py ├── dgu_net.py ├── .run_ce.sh ├── _ce.py ├── inference_model.py ├── run.sh └── predict.py ├── .DS_Store ├── other └── ld.jpg └── README.md /ADE/ade/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DAM/bin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DAM/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DAM/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DGU/dgu/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ADE/ade/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DGU/dgu/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DAM/log/ReadMe.txt: -------------------------------------------------------------------------------- 1 | store logs here. -------------------------------------------------------------------------------- /DAM/output/ReadMe.txt: -------------------------------------------------------------------------------- 1 | store models here. -------------------------------------------------------------------------------- /ADE/data/input/input.md: -------------------------------------------------------------------------------- 1 | training data directory 2 | -------------------------------------------------------------------------------- /DGU/data/input/input.md: -------------------------------------------------------------------------------- 1 | input train and test data directory 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/.DS_Store -------------------------------------------------------------------------------- /ADE/data/output/output.md: -------------------------------------------------------------------------------- 1 | save predict results output directory 2 | -------------------------------------------------------------------------------- /DGU/data/output/output.md: -------------------------------------------------------------------------------- 1 | save predict results output directory 2 | -------------------------------------------------------------------------------- /other/ld.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/other/ld.jpg -------------------------------------------------------------------------------- /ADE/data/inference_models/inference_models.md: -------------------------------------------------------------------------------- 1 | save inference model directory 2 | -------------------------------------------------------------------------------- /DAM/run.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | CUDA_VISIBLE_DEVICES=0 python main.py 3 | 4 | 5 | -------------------------------------------------------------------------------- /DGU/data/inference_models/inference_models.md: -------------------------------------------------------------------------------- 1 | save inference model directory 2 | -------------------------------------------------------------------------------- /DGU/images/dgu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DGU/images/dgu.png -------------------------------------------------------------------------------- /DAM/appendix/Figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DAM/appendix/Figure1.png -------------------------------------------------------------------------------- /DAM/appendix/Figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DAM/appendix/Figure2.png -------------------------------------------------------------------------------- /ADE/data/saved_models/saved_models.md: -------------------------------------------------------------------------------- 1 | save user finetuning models and trained model we provided 2 | -------------------------------------------------------------------------------- /DGU/data/saved_models/saved_models.md: -------------------------------------------------------------------------------- 1 | save user finetuning models and trained model we provided 2 | -------------------------------------------------------------------------------- /ADE/data/pretrain_model/pretrain_model.md: -------------------------------------------------------------------------------- 1 | pretrain model directory: model for network initialization 2 | -------------------------------------------------------------------------------- /DGU/data/pretrain_model/pretrain_model.md: -------------------------------------------------------------------------------- 1 | pretrain model directory: in this module, we use bert as pretrain model 2 | -------------------------------------------------------------------------------- /DAM/data/ReadMe.txt: -------------------------------------------------------------------------------- 1 | please download data from: 2 | 3 | https://pan.baidu.com/s/1hakfuuwdS8xl7NyxlWzRiQ 4 | 5 | and unzip it in this floder. 6 | -------------------------------------------------------------------------------- /ADE/data/config/ade.yaml: -------------------------------------------------------------------------------- 1 | loss_type: "CLS" 2 | training_file: "" 3 | val_file: "" 4 | predict_file: "" 5 | print_steps: 10 6 | save_steps: 10 7 | num_scan_data: "" 8 | word_emb_init: "" 9 | init_model: "" 10 | use_cuda: True 11 | batch_size: 256 12 | hidden_size: 256 13 | emb_size: 256 14 | vocab_size: 484016 15 | sample_pro: 1.0 16 | output_prediction_file: "" 17 | init_from_checkpoint: "" 18 | init_from_params: "" 19 | init_from_pretrain_model: "" 20 | inference_model_dir: "" 21 | save_model_path: "" 22 | save_checkpoint: "" 23 | save_param: "" 24 | evaluation_file: "" 25 | vocab_path: "" 26 | max_seq_len: 128 27 | random_seed: 110 28 | do_save_inference_model: False 29 | enable_ce: "" 30 | -------------------------------------------------------------------------------- /DGU/data/config/dgu.yaml: -------------------------------------------------------------------------------- 1 | task_name: "" 2 | data_dir: "" 3 | bert_config_path: "" 4 | init_from_checkpoint: "" 5 | init_from_params: "" 6 | init_from_pretrain_model: "" 7 | inference_model_dir: "" 8 | save_model_path: "" 9 | save_checkpoint: "" 10 | save_param: "" 11 | lr_scheduler: "linear_warmup_decay" 12 | weight_decay: 0.01 13 | warmup_proportion: 0.1 14 | save_steps: 1000 15 | use_fp16: False 16 | loss_scaling: 1.0 17 | print_steps: 20 18 | evaluation_file: "" 19 | output_prediction_file: "" 20 | vocab_path: "" 21 | max_seq_len: 128 22 | batch_size: 2 23 | verbose: False 24 | do_lower_case: False 25 | random_seed: 0 26 | use_cuda: True 27 | in_tokens: False 28 | do_save_inference_model: False 29 | enable_ce: "" 30 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/README.md: -------------------------------------------------------------------------------- 1 | scripts:运行数据处理脚本目录, 将官方公开数据集转换成模型所需训练数据格式 2 | 运行命令: 3 | python run_build_data.py [udc|swda|mrda|atis|dstc2] 4 | 5 | 1)、生成MATCHING任务所需要的训练集、开发集、测试集时: 6 | python run_build_data.py udc 7 | 生成数据在dialogue_general_understanding/data/input/data/udc 8 | 9 | 2)、生成DA任务所需要的训练集、开发集、测试集时: 10 | python run_build_data.py swda 11 | python run_build_data.py mrda 12 | 生成数据分别在dialogue_general_understanding/data/input/data/swda和dialogue_general_understanding/data/input/data/mrda 13 | 14 | 3)、生成DST任务所需的训练集、开发集、测试集时: 15 | python run_build_data.py dstc2 16 | 生成数据分别在dialogue_general_understanding/data/input/data/dstc2 17 | 18 | 4)、生成意图解析, 槽位识别任务所需训练集、开发集、测试集时: 19 | python run_build_data.py atis 20 | 生成槽位识别数据在dialogue_general_understanding/data/input/data/atis/atis_slot 21 | 生成意图识别数据在dialogue_general_understanding/data/input/data/atis/atis_intent 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /DGU/dgu/utils/py23.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import sys 17 | 18 | if sys.version[0] == '2': 19 | rt_tok = u'\n' 20 | tab_tok = u'\t' 21 | space_tok = u' ' 22 | else: 23 | rt_tok = '\n' 24 | tab_tok = '\t' 25 | space_tok = ' ' 26 | -------------------------------------------------------------------------------- /DAM/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | def get_p_at_n_in_m(data, n, m, ind): 4 | pos_score = data[ind][0]; 5 | curr = data[ind:ind+m]; 6 | curr = sorted(curr, key = lambda x:x[0], reverse=True) 7 | 8 | if curr[n-1][0] <= pos_score: 9 | return 1; 10 | return 0; 11 | 12 | def evaluate(file_path): 13 | data = [] 14 | with open(file_path, 'r') as file: 15 | for line in file: 16 | line = line.strip(); 17 | tokens = line.split("\t") 18 | 19 | if len(tokens) != 2: 20 | continue 21 | 22 | data.append((float(tokens[0]), int(tokens[1]))); 23 | 24 | #assert len(data) % 10 == 0 25 | 26 | p_at_1_in_2 = 0.0 27 | p_at_1_in_10 = 0.0 28 | p_at_2_in_10 = 0.0 29 | p_at_5_in_10 = 0.0 30 | 31 | length = int(len(data)/10) 32 | 33 | for i in xrange(0, length): 34 | ind = i * 10 35 | assert data[ind][1] == 1 36 | 37 | p_at_1_in_2 += get_p_at_n_in_m(data, 1, 2, ind) 38 | p_at_1_in_10 += get_p_at_n_in_m(data, 1, 10, ind) 39 | p_at_2_in_10 += get_p_at_n_in_m(data, 2, 10, ind) 40 | p_at_5_in_10 += get_p_at_n_in_m(data, 5, 10, ind) 41 | 42 | return (p_at_1_in_2/length, p_at_1_in_10/length, p_at_2_in_10/length, p_at_5_in_10/length) 43 | 44 | 45 | -------------------------------------------------------------------------------- /ADE/ade/utils/input_field.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import six 21 | import ast 22 | import copy 23 | 24 | import numpy as np 25 | import paddle.fluid as fluid 26 | 27 | 28 | class InputField(object): 29 | def __init__(self, input_field): 30 | """init inpit field""" 31 | self.context_wordseq = input_field[0] 32 | self.response_wordseq = input_field[1] 33 | self.labels = input_field[2] 34 | -------------------------------------------------------------------------------- /DGU/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """evaluation""" 15 | 16 | import os 17 | import sys 18 | 19 | from dgu.evaluation import evaluate 20 | from dgu.utils.configure import PDConfig 21 | 22 | 23 | def do_eval(args): 24 | 25 | task_name = args.task_name.lower() 26 | reference = args.evaluation_file 27 | predicitions = args.output_prediction_file 28 | 29 | evaluate(task_name, predicitions, reference) 30 | 31 | 32 | if __name__ == "__main__": 33 | 34 | args = PDConfig(yaml_file="./data/config/dgu.yaml") 35 | args.build() 36 | 37 | do_eval(args) 38 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/conf/mrda.conf: -------------------------------------------------------------------------------- 1 | train Bdb001 2 | train Bed002 3 | train Bed004 4 | train Bed005 5 | train Bed008 6 | train Bed009 7 | train Bed011 8 | train Bed013 9 | train Bed014 10 | train Bed015 11 | train Bed017 12 | train Bmr002 13 | train Bmr003 14 | train Bmr006 15 | train Bmr007 16 | train Bmr008 17 | train Bmr009 18 | train Bmr011 19 | train Bmr012 20 | train Bmr015 21 | train Bmr016 22 | train Bmr020 23 | train Bmr021 24 | train Bmr023 25 | train Bmr025 26 | train Bmr026 27 | train Bmr027 28 | train Bmr029 29 | train Bmr031 30 | train Bns001 31 | train Bns002 32 | train Bns003 33 | train Bro003 34 | train Bro005 35 | train Bro007 36 | train Bro010 37 | train Bro012 38 | train Bro013 39 | train Bro015 40 | train Bro016 41 | train Bro017 42 | train Bro019 43 | train Bro022 44 | train Bro023 45 | train Bro025 46 | train Bro026 47 | train Bro028 48 | train Bsr001 49 | train Btr001 50 | train Btr002 51 | train Buw001 52 | dev Bed003 53 | dev Bed010 54 | dev Bmr005 55 | dev Bmr014 56 | dev Bmr019 57 | dev Bmr024 58 | dev Bmr030 59 | dev Bro004 60 | dev Bro011 61 | dev Bro018 62 | dev Bro024 63 | test Bed006 64 | test Bed012 65 | test Bed016 66 | test Bmr001 67 | test Bmr010 68 | test Bmr022 69 | test Bmr028 70 | test Bro008 71 | test Bro014 72 | test Bro021 73 | test Bro027 74 | -------------------------------------------------------------------------------- /DGU/dgu/utils/input_field.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import six 21 | import ast 22 | import copy 23 | 24 | import numpy as np 25 | import paddle.fluid as fluid 26 | 27 | 28 | class InputField(object): 29 | def __init__(self, input_field): 30 | """init inpit field""" 31 | self.src_ids = input_field[0] 32 | self.pos_ids = input_field[1] 33 | self.sent_ids = input_field[2] 34 | self.input_mask = input_field[3] 35 | self.labels = input_field[4] 36 | -------------------------------------------------------------------------------- /DAM/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | 5 | import cPickle as pickle 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | import utils.reader as reader 10 | import models.net as net 11 | import utils.evaluation as eva 12 | #for douban 13 | #import utils.douban_evaluation as eva 14 | 15 | import bin.train_and_evaluate as train 16 | import bin.test_and_evaluate as test 17 | 18 | # configure 19 | 20 | conf = { 21 | "data_path": "./data/ubuntu/data.pkl", 22 | "save_path": "./output/ubuntu/temp/", 23 | "word_emb_init": "./data/word_embedding.pkl", 24 | "init_model": None, #should be set for test 25 | 26 | "rand_seed": None, 27 | 28 | "drop_dense": None, 29 | "drop_attention": None, 30 | 31 | "is_mask": True, 32 | "is_layer_norm": True, 33 | "is_positional": False, 34 | 35 | "stack_num": 5, 36 | "attention_type": "dot", 37 | 38 | "learning_rate": 1e-3, 39 | "vocab_size": 434512, 40 | "emb_size": 200, 41 | "batch_size": 256, #200 for test 42 | 43 | "max_turn_num": 9, 44 | "max_turn_len": 50, 45 | 46 | "max_to_keep": 1, 47 | "num_scan_data": 2, 48 | "_EOS_": 28270, #1 for douban data 49 | "final_n_class": 1, 50 | } 51 | 52 | 53 | model = net.Net(conf) 54 | train.train(conf, model) 55 | 56 | #test and evaluation, init_model in conf should be set 57 | #test.test(conf, model) 58 | 59 | -------------------------------------------------------------------------------- /DGU/main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import sys 17 | import numpy as np 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | 22 | from eval import do_eval 23 | from train import do_train 24 | from predict import do_predict 25 | from inference_model import do_save_inference_model 26 | 27 | from dgu.utils.configure import PDConfig 28 | 29 | 30 | if __name__ == "__main__": 31 | 32 | args = PDConfig(yaml_file="./data/config/dgu.yaml") 33 | args.build() 34 | args.Print() 35 | 36 | if args.do_train: 37 | do_train(args) 38 | 39 | if args.do_predict: 40 | do_predict(args) 41 | 42 | if args.do_eval: 43 | do_eval(args) 44 | 45 | if args.do_save_inference_model: 46 | do_save_inference_model(args) 47 | 48 | # vim: set ts=4 sw=4 sts=4 tw=100: 49 | -------------------------------------------------------------------------------- /ADE/ade/utils/model_check.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import paddle 17 | import paddle.fluid as fluid 18 | 19 | 20 | def check_cuda(use_cuda, err = \ 21 | "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ 22 | Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" 23 | ): 24 | try: 25 | if use_cuda == True and fluid.is_compiled_with_cuda() == False: 26 | print(err) 27 | sys.exit(1) 28 | except Exception as e: 29 | pass 30 | 31 | 32 | if __name__ == "__main__": 33 | 34 | check_cuda(True) 35 | 36 | check_cuda(False) 37 | 38 | check_cuda(True, "This is only for testing.") 39 | -------------------------------------------------------------------------------- /DGU/dgu/utils/model_check.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import paddle 17 | import paddle.fluid as fluid 18 | 19 | 20 | def check_cuda(use_cuda, err = \ 21 | "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ 22 | Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" 23 | ): 24 | try: 25 | if use_cuda == True and fluid.is_compiled_with_cuda() == False: 26 | print(err) 27 | sys.exit(1) 28 | except Exception as e: 29 | pass 30 | 31 | 32 | if __name__ == "__main__": 33 | 34 | check_cuda(True) 35 | 36 | check_cuda(False) 37 | 38 | check_cuda(True, "This is only for testing.") 39 | -------------------------------------------------------------------------------- /ADE/main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import sys 17 | import numpy as np 18 | 19 | import paddle 20 | import paddle.fluid as fluid 21 | 22 | from eval import do_eval 23 | from train import do_train 24 | from predict import do_predict 25 | from inference_model import do_save_inference_model 26 | 27 | from ade.utils.configure import PDConfig 28 | 29 | 30 | if __name__ == "__main__": 31 | 32 | args = PDConfig(yaml_file="./data/config/ade.yaml") 33 | args.build() 34 | args.Print() 35 | 36 | if args.do_train: 37 | do_train(args) 38 | 39 | if args.do_predict: 40 | do_predict(args) 41 | 42 | if args.do_eval: 43 | do_eval(args) 44 | 45 | if args.do_save_inference_model: 46 | do_save_inference_model(args) 47 | 48 | # vim: set ts=4 sw=4 sts=4 tw=100: 49 | -------------------------------------------------------------------------------- /ADE/.run_ce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export FLAGS_sync_nccl_allreduce=0 4 | export FLAGS_eager_delete_tensor_gb=1.0 5 | 6 | export CUDA_VISIBLE_DEVICES=0 7 | 8 | pretrain_model_path="data/saved_models/matching_pretrained" 9 | if [ ! -d ${pretrain_model_path} ] 10 | then 11 | mkdir ${pretrain_model_path} 12 | fi 13 | 14 | python -u main.py \ 15 | --do_train=true \ 16 | --use_cuda=true \ 17 | --loss_type="CLS" \ 18 | --max_seq_len=50 \ 19 | --save_model_path="data/saved_models/matching_pretrained" \ 20 | --save_param="params" \ 21 | --training_file="data/input/data/unlabel_data/train.ids" \ 22 | --epoch=3 \ 23 | --print_step=1 \ 24 | --save_step=400 \ 25 | --batch_size=256 \ 26 | --hidden_size=256 \ 27 | --emb_size=256 \ 28 | --vocab_size=484016 \ 29 | --learning_rate=0.001 \ 30 | --sample_pro=0.1 \ 31 | --enable_ce="store_true" | python _ce.py 32 | 33 | 34 | export CUDA_VISIBLE_DEVICES=0,1,2,3 35 | 36 | python -u main.py \ 37 | --do_train=true \ 38 | --use_cuda=true \ 39 | --loss_type="CLS" \ 40 | --max_seq_len=50 \ 41 | --save_model_path="data/saved_models/matching_pretrained" \ 42 | --save_param="params" \ 43 | --training_file="data/input/data/unlabel_data/train.ids" \ 44 | --epoch=3 \ 45 | --print_step=1 \ 46 | --save_step=400 \ 47 | --batch_size=256 \ 48 | --hidden_size=256 \ 49 | --emb_size=256 \ 50 | --vocab_size=484016 \ 51 | --learning_rate=0.001 \ 52 | --sample_pro=0.1 \ 53 | --enable_ce="store_true" | python _ce.py 54 | 55 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/run_build_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import shutil 15 | import sys 16 | import os 17 | 18 | from build_atis_dataset import ATIS 19 | from build_dstc2_dataset import DSTC2 20 | from build_mrda_dataset import MRDA 21 | from build_swda_dataset import SWDA 22 | 23 | 24 | if __name__ == "__main__": 25 | task_name = sys.argv[1] 26 | task_name = task_name.lower() 27 | 28 | if task_name not in ['swda', 'mrda', 'atis', 'dstc2', 'udc']: 29 | print("task name error: we support [swda|mrda|atis|dstc2|udc]") 30 | exit(1) 31 | 32 | if task_name == 'swda': 33 | swda_inst = SWDA() 34 | swda_inst.main() 35 | elif task_name == 'mrda': 36 | mrda_inst = MRDA() 37 | mrda_inst.main() 38 | elif task_name == 'atis': 39 | atis_inst = ATIS() 40 | atis_inst.main() 41 | shutil.copyfile("../../data/input/data/atis/atis_slot/test.txt", "../../data/input/data/atis/atis_slot/dev.txt") 42 | shutil.copyfile("../../data/input/data/atis/atis_intent/test.txt", "../../data/input/data/atis/atis_intent/dev.txt") 43 | elif task_name == 'dstc2': 44 | dstc_inst = DSTC2() 45 | dstc_inst.main() 46 | else: 47 | exit(0) 48 | 49 | -------------------------------------------------------------------------------- /DGU/dgu_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Create model for dialogue task.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import paddle.fluid as fluid 21 | 22 | from dgu.bert import BertModel 23 | from dgu.utils.configure import JsonConfig 24 | 25 | 26 | def create_net( 27 | is_training, 28 | model_input, 29 | num_labels, 30 | paradigm_inst, 31 | args): 32 | """create dialogue task model""" 33 | 34 | src_ids = model_input.src_ids 35 | pos_ids = model_input.pos_ids 36 | sent_ids = model_input.sent_ids 37 | input_mask = model_input.input_mask 38 | labels = model_input.labels 39 | 40 | assert isinstance(args.bert_config_path, str) 41 | 42 | bert_conf = JsonConfig(args.bert_config_path) 43 | bert = BertModel( 44 | src_ids=src_ids, 45 | position_ids=pos_ids, 46 | sentence_ids=sent_ids, 47 | input_mask=input_mask, 48 | config=bert_conf, 49 | use_fp16=False) 50 | 51 | params = {'num_labels': num_labels, 52 | 'src_ids': src_ids, 53 | 'pos_ids': pos_ids, 54 | 'sent_ids': sent_ids, 55 | 'input_mask': input_mask, 56 | 'labels': labels, 57 | 'is_training': is_training} 58 | 59 | results = paradigm_inst.paradigm(bert, params) 60 | return results 61 | 62 | -------------------------------------------------------------------------------- /DGU/dgu/scripts/commonlib.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """common function""" 16 | import sys 17 | import io 18 | import os 19 | 20 | 21 | def get_file_list(dir_name): 22 | """ 23 | get file list in directory 24 | """ 25 | file_list = list() 26 | file_path = list() 27 | for root, dirs, files in os.walk(dir_name): 28 | for file in files: 29 | file_list.append(file) 30 | file_path.append(os.path.join(root, file)) 31 | return file_list, file_path 32 | 33 | 34 | def get_dir_list(dir_name): 35 | """ 36 | get directory names 37 | """ 38 | child_dir = [] 39 | dir_list = os.listdir(dir_name) 40 | for cur_file in dir_list: 41 | path = os.path.join(dir_name, cur_file) 42 | if not os.path.isdir(path): 43 | continue 44 | child_dir.append(path) 45 | return child_dir 46 | 47 | 48 | def load_dict(conf): 49 | """ 50 | load swda dataset config 51 | """ 52 | conf_dict = dict() 53 | fr = io.open(conf, 'r', encoding="utf8") 54 | for line in fr: 55 | line = line.strip() 56 | elems = line.split('\t') 57 | if elems[0] not in conf_dict: 58 | conf_dict[elems[0]] = [] 59 | conf_dict[elems[0]].append(elems[1]) 60 | return conf_dict 61 | 62 | 63 | def load_voc(conf): 64 | """ 65 | load map dict 66 | """ 67 | map_dict = {} 68 | fr = io.open(conf, 'r', encoding="utf8") 69 | for line in fr: 70 | line = line.strip() 71 | elems = line.split('\t') 72 | map_dict[elems[0]] = elems[1] 73 | return map_dict 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /ADE/ade/evaluate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Evaluation for auto dialogue evaluation""" 15 | 16 | import sys 17 | import numpy as np 18 | import pandas as pd 19 | 20 | 21 | def get_p_at_n_in_m(data, n, m, ind): 22 | """ 23 | Get n in m 24 | """ 25 | pos_score = data[ind][0] 26 | curr = data[ind:ind + m] 27 | curr = sorted(curr, key=lambda x: x[0], reverse=True) 28 | 29 | if curr[n - 1][0] <= pos_score: 30 | return 1 31 | return 0 32 | 33 | 34 | def evaluate_Recall(data): 35 | """ 36 | Evaluate Recall 37 | """ 38 | p_at_1_in_2 = 0.0 39 | p_at_1_in_10 = 0.0 40 | p_at_2_in_10 = 0.0 41 | p_at_5_in_10 = 0.0 42 | 43 | length = len(data) // 10 44 | print('length=%s' % length) 45 | 46 | for i in range(0, length): 47 | ind = i * 10 48 | assert data[ind][1] == 1 49 | 50 | p_at_1_in_2 += get_p_at_n_in_m(data, 1, 2, ind) 51 | p_at_1_in_10 += get_p_at_n_in_m(data, 1, 10, ind) 52 | p_at_2_in_10 += get_p_at_n_in_m(data, 2, 10, ind) 53 | p_at_5_in_10 += get_p_at_n_in_m(data, 5, 10, ind) 54 | 55 | recall_dict = { 56 | '1_in_2': p_at_1_in_2 / length, 57 | '1_in_10': p_at_1_in_10 / length, 58 | '2_in_10': p_at_2_in_10 / length, 59 | '5_in_10': p_at_5_in_10 / length 60 | } 61 | 62 | return recall_dict 63 | 64 | 65 | def evaluate_cor(pred, true): 66 | """ 67 | Evaluate cor 68 | """ 69 | df = pd.DataFrame({'pred': pred, 'true': true}) 70 | cor_matrix = df.corr('spearman') 71 | return cor_matrix['pred']['true'] 72 | -------------------------------------------------------------------------------- /DGU/.run_ce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | train_atis_slot(){ 4 | if [ ! -d "./data/saved_models/atis_slot" ]; then 5 | mkdir "./data/saved_models/atis_slot" 6 | fi 7 | python -u train.py \ 8 | --task_name=atis_slot \ 9 | --use_cuda=true \ 10 | --do_train=true \ 11 | --in_tokens=false \ 12 | --epoch=2 \ 13 | --batch_size=32 \ 14 | --data_dir=./data/input/data/atis/atis_slot \ 15 | --bert_config_path=./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json \ 16 | --vocab_path=./data/pretrain_model/uncased_L-12_H-768_A-12/vocab.txt \ 17 | --init_from_pretrain_model=./data/pretrain_model/uncased_L-12_H-768_A-12/params \ 18 | --save_model_path=./data/saved_models/atis_slot \ 19 | --save_param="params" \ 20 | --save_steps=100 \ 21 | --learning_rate=2e-5 \ 22 | --weight_decay=0.01 \ 23 | --max_seq_len=128 \ 24 | --print_steps=10 \ 25 | --use_fp16=false \ 26 | --enable_ce=store_true 27 | } 28 | 29 | train_mrda(){ 30 | if [ ! -d "./data/saved_models/mrda" ]; then 31 | mkdir "./data/saved_models/mrda" 32 | fi 33 | python -u train.py \ 34 | --task_name=mrda \ 35 | --use_cuda=true \ 36 | --do_train=true \ 37 | --in_tokens=true \ 38 | --epoch=2 \ 39 | --batch_size=4096 \ 40 | --data_dir=./data/input/data/mrda \ 41 | --bert_config_path=./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json \ 42 | --vocab_path=./data/pretrain_model/uncased_L-12_H-768_A-12/vocab.txt \ 43 | --init_from_pretrain_model=./data/pretrain_model/uncased_L-12_H-768_A-12/params \ 44 | --save_model_path=./data/saved_models/mrda \ 45 | --save_param="params" \ 46 | --save_steps=500 \ 47 | --learning_rate=2e-5 \ 48 | --weight_decay=0.01 \ 49 | --max_seq_len=128 \ 50 | --print_steps=200 \ 51 | --use_fp16=false \ 52 | --enable_ce=store_true 53 | } 54 | 55 | # FIXME(zjl): this model would fail when GC is enabled, 56 | # but it seems that this error is from the model itself. 57 | # See issue here: https://github.com/PaddlePaddle/Paddle/issues/18994#event-2532039900 58 | # To fix ce, disable gc in this model temporarily. 59 | export FLAGS_eager_delete_tensor_gb=1 60 | 61 | cudaid=${multi:=0,1,2,3} 62 | export CUDA_VISIBLE_DEVICES=$cudaid 63 | train_atis_slot | python _ce.py 64 | sleep 20 65 | 66 | cudaid=${single:=0} 67 | export CUDA_VISIBLE_DEVICES=$cudaid 68 | train_atis_slot | python _ce.py 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [](https://opensource.org/licenses/MIT) 2 | 3 | # New record is achieved by ERNIE_English (2019/06/13) 4 | We got the new, best score of R_10 at 1 (**85.67%**) in the Ubuntu Corpus by incorporating ERNIE_English, an English pre-trained model from Baidu. 5 | Please refer to DMTK (the Dialogue Modeling ToolKit) for more details. 6 | https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/PaddleDialogue 7 | 8 | 9 | # Baidu NLP Dialogue team 10 | The dialogue team, at Baidu NLP, is a group of engineers and researchers who truly trust in technology and work together to accelerate the development of open-domain dialogues. 11 | 12 | Our battlefields include but not limited to the fundamental technology of neural dialogue system (seq2seq generation or context-response matching), knowledge-driven dialogue, life-long learning dialogue system with reinforcement-learning, and also we provide the system-level solution for open-domain chatbots. 13 | 14 | Together we built the largest Chinese Human-Computer conversation systems and support many businesses such as DuerOS, the largest Chatbot in China, our life-long learning system interacts with hundreds of millions of Chinese users every day and learns through imitation/user-feedback, distilling knowledge from the conversation and learning to be smarter. 15 | 16 | We will release some source code of our previous work in the future, to make some small contribution to the whole community of human-computer conversation. 17 | 18 | # Publication 19 | > + [Proactive Human-Machine Conversation with Explicit Conversation Goals](https://arxiv.org/abs/1906.05572). *ACL 2019, Full Paper, poster* 20 | > + [Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network](http://aclweb.org/anthology/P18-1103). *ACL 2018, Full Paper, oral* 21 | > + [Multi-View Response Selection for Human-Computer Conversation](http://www.aclweb.org/anthology/D16-1036). *EMNLP 2016, Full Paper, poster* 22 | > + [Shall I be Your Chat Companion towards an Online Human-Computer Conversation System](http://research.baidu.com/Public/uploads/5acc2a6723f1d.pdf). *CIKM 2016, Full Paper, oral* 23 | 24 | # Connected to our Chatbot Service 25 | Any Chinese developers can enable their own smart devices to talk with customers on open-domain topics by using our open chatbot service. Please find the usage manual at http://ai.baidu.com/forum/topic/show/497679 (in Chinese). 26 | 27 | -------------------------------------------------------------------------------- /ADE/_ce.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """this file is only used for continuous evaluation test!""" 15 | 16 | import os 17 | import sys 18 | sys.path.append(os.environ['ceroot']) 19 | from kpi import CostKpi 20 | from kpi import DurationKpi 21 | 22 | train_loss_card1 = CostKpi('train_loss_card1', 0.03, 0, actived=True) 23 | train_loss_card4 = CostKpi('train_loss_card4', 0.03, 0, actived=True) 24 | train_duration_card1 = DurationKpi('train_duration_card1', 0.01, 0, actived=True) 25 | train_duration_card4 = DurationKpi('train_duration_card4', 0.01, 0, actived=True) 26 | 27 | tracking_kpis = [ 28 | train_loss_card1, 29 | train_loss_card4, 30 | train_duration_card1, 31 | train_duration_card4, 32 | ] 33 | 34 | 35 | def parse_log(log): 36 | ''' 37 | This method should be implemented by model developers. 38 | 39 | The suggestion: 40 | 41 | each line in the log should be key, value, for example: 42 | 43 | " 44 | train_cost\t1.0 45 | test_cost\t1.0 46 | train_cost\t1.0 47 | train_cost\t1.0 48 | train_acc\t1.2 49 | " 50 | ''' 51 | for line in log.split('\n'): 52 | fs = line.strip().split('\t') 53 | print(fs) 54 | if len(fs) == 3 and fs[0] == 'kpis': 55 | kpi_name = fs[1] 56 | kpi_value = float(fs[2]) 57 | yield kpi_name, kpi_value 58 | 59 | 60 | def log_to_ce(log): 61 | kpi_tracker = {} 62 | for kpi in tracking_kpis: 63 | kpi_tracker[kpi.name] = kpi 64 | 65 | for (kpi_name, kpi_value) in parse_log(log): 66 | print(kpi_name, kpi_value) 67 | kpi_tracker[kpi_name].add_record(kpi_value) 68 | kpi_tracker[kpi_name].persist() 69 | 70 | 71 | if __name__ == '__main__': 72 | log = sys.stdin.read() 73 | log_to_ce(log) 74 | -------------------------------------------------------------------------------- /ADE/ade/prepare_data_and_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import tarfile 17 | import shutil 18 | import urllib 19 | import sys 20 | import io 21 | import os 22 | 23 | URLLIB=urllib 24 | if sys.version_info >= (3, 0): 25 | import urllib.request 26 | URLLIB=urllib.request 27 | 28 | DATA_MODEL_PATH = {"DATA_PATH": "https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz", 29 | "TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.2.0.0.tar.gz"} 30 | 31 | PATH_MAP = {'DATA_PATH': "./data/input", 32 | 'TRAINED_MODEL': './data/saved_models'} 33 | 34 | 35 | def un_tar(tar_name, dir_name): 36 | try: 37 | t = tarfile.open(tar_name) 38 | t.extractall(path = dir_name) 39 | return True 40 | except Exception as e: 41 | print(e) 42 | return False 43 | 44 | 45 | def download_model_and_data(): 46 | print("Downloading ade data, pretrain model and trained models......") 47 | print("This process is quite long, please wait patiently............") 48 | for path in ['./data/input/data', './data/saved_models/trained_models']: 49 | if not os.path.exists(path): 50 | continue 51 | shutil.rmtree(path) 52 | for path_key in DATA_MODEL_PATH: 53 | filename = os.path.basename(DATA_MODEL_PATH[path_key]) 54 | URLLIB.urlretrieve(DATA_MODEL_PATH[path_key], os.path.join("./", filename)) 55 | state = un_tar(filename, PATH_MAP[path_key]) 56 | if not state: 57 | print("Tar %s error....." % path_key) 58 | return False 59 | os.remove(filename) 60 | return True 61 | 62 | 63 | if __name__ == "__main__": 64 | state = download_model_and_data() 65 | if not state: 66 | exit(1) 67 | print("Downloading data and models sucess......") 68 | -------------------------------------------------------------------------------- /ADE/eval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """evaluation metrics""" 16 | 17 | import io 18 | import os 19 | import sys 20 | import numpy as np 21 | 22 | import ade.evaluate as evaluate 23 | from ade.utils.configure import PDConfig 24 | 25 | 26 | def do_eval(args): 27 | """evaluate metrics""" 28 | labels = [] 29 | fr = io.open(args.evaluation_file, 'r', encoding="utf8") 30 | for line in fr: 31 | tokens = line.strip().split('\t') 32 | assert len(tokens) == 3 33 | label = int(tokens[2]) 34 | labels.append(label) 35 | 36 | scores = [] 37 | fr = io.open(args.output_prediction_file, 'r', encoding="utf8") 38 | for line in fr: 39 | tokens = line.strip().split('\t') 40 | assert len(tokens) == 2 41 | score = tokens[1].strip("[]").split() 42 | score = np.array(score) 43 | score = score.astype(np.float64) 44 | scores.append(score) 45 | 46 | if args.loss_type == 'CLS': 47 | recall_dict = evaluate.evaluate_Recall(list(zip(scores, labels))) 48 | mean_score = sum(scores) / len(scores) 49 | print('mean score: %.6f' % mean_score) 50 | print('evaluation recall result:') 51 | print('1_in_2: %.6f\t1_in_10: %.6f\t2_in_10: %.6f\t5_in_10: %.6f' % 52 | (recall_dict['1_in_2'], recall_dict['1_in_10'], 53 | recall_dict['2_in_10'], recall_dict['5_in_10'])) 54 | elif args.loss_type == 'L2': 55 | scores = [x[0] for x in scores] 56 | mean_score = sum(scores) / len(scores) 57 | cor = evaluate.evaluate_cor(scores, labels) 58 | print('mean score: %.6f\nevaluation cor results:%.6f' % 59 | (mean_score, cor)) 60 | else: 61 | raise ValueError 62 | 63 | 64 | if __name__ == "__main__": 65 | args = PDConfig(yaml_file="./data/config/ade.yaml") 66 | args.build() 67 | 68 | do_eval(args) 69 | -------------------------------------------------------------------------------- /DGU/dgu/prepare_data_and_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import tarfile 17 | import shutil 18 | import urllib 19 | import sys 20 | import io 21 | import os 22 | 23 | 24 | URLLIB=urllib 25 | if sys.version_info >= (3, 0): 26 | import urllib.request 27 | URLLIB=urllib.request 28 | 29 | DATA_MODEL_PATH = {"DATA_PATH": "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz", 30 | "PRETRAIN_MODEL": "https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz", 31 | "TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/dgu_models_2.0.0.tar.gz"} 32 | 33 | PATH_MAP = {'DATA_PATH': "./data/input", 34 | 'PRETRAIN_MODEL': './data/pretrain_model', 35 | 'TRAINED_MODEL': './data/saved_models'} 36 | 37 | 38 | def un_tar(tar_name, dir_name): 39 | try: 40 | t = tarfile.open(tar_name) 41 | t.extractall(path = dir_name) 42 | return True 43 | except Exception as e: 44 | print(e) 45 | return False 46 | 47 | 48 | def download_model_and_data(): 49 | print("Downloading dgu data, pretrain model and trained models......") 50 | print("This process is quite long, please wait patiently............") 51 | for path in ['./data/input/data', './data/pretrain_model/uncased_L-12_H-768_A-12', './data/saved_models/trained_models']: 52 | if not os.path.exists(path): 53 | continue 54 | shutil.rmtree(path) 55 | for path_key in DATA_MODEL_PATH: 56 | filename = os.path.basename(DATA_MODEL_PATH[path_key]) 57 | URLLIB.urlretrieve(DATA_MODEL_PATH[path_key], os.path.join("./", filename)) 58 | state = un_tar(filename, PATH_MAP[path_key]) 59 | if not state: 60 | print("Tar %s error....." % path_key) 61 | return False 62 | os.remove(filename) 63 | return True 64 | 65 | 66 | if __name__ == "__main__": 67 | state = download_model_and_data() 68 | if not state: 69 | exit(1) 70 | print("Downloading data and models sucess......") 71 | -------------------------------------------------------------------------------- /DGU/dgu/define_predict_pack.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """define prediction results""" 15 | 16 | import re 17 | import sys 18 | import numpy as np 19 | 20 | import paddle 21 | import paddle.fluid as fluid 22 | 23 | 24 | class DefinePredict(object): 25 | """ 26 | Packaging Prediction Results 27 | """ 28 | def __init__(self): 29 | """ 30 | init 31 | """ 32 | self.task_map = {'udc': 'get_matching_res', 33 | 'swda': 'get_cls_res', 34 | 'mrda': 'get_cls_res', 35 | 'atis_intent': 'get_cls_res', 36 | 'atis_slot': 'get_sequence_tagging', 37 | 'dstc2': 'get_multi_cls_res', 38 | 'dstc2_asr': 'get_multi_cls_res', 39 | 'multi-woz': 'get_multi_cls_res'} 40 | 41 | def get_matching_res(self, probs, params=None): 42 | """ 43 | get matching score 44 | """ 45 | probs = list(probs) 46 | return probs[1] 47 | 48 | def get_cls_res(self, probs, params=None): 49 | """ 50 | get da classify tag 51 | """ 52 | probs = list(probs) 53 | max_prob = max(probs) 54 | tag = probs.index(max_prob) 55 | return tag 56 | 57 | def get_sequence_tagging(self, probs, params=None): 58 | """ 59 | get sequence tagging tag 60 | """ 61 | labels = [] 62 | batch_labels = np.array(probs).reshape(-1, params) 63 | labels = [" ".join([str(l) for l in list(l_l)]) for l_l in batch_labels] 64 | return labels 65 | 66 | def get_multi_cls_res(self, probs, params=None): 67 | """ 68 | get dst classify tag 69 | """ 70 | labels = [] 71 | probs = list(probs) 72 | for i in range(len(probs)): 73 | if probs[i] >= 0.5: 74 | labels.append(i) 75 | if not labels: 76 | max_prob = max(probs) 77 | label_str = str(probs.index(max_prob)) 78 | else: 79 | label_str = " ".join([str(l) for l in sorted(labels)]) 80 | 81 | return label_str 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /DGU/_ce.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """this file is only used for continuous evaluation test!""" 15 | 16 | import os 17 | import sys 18 | sys.path.append(os.environ['ceroot']) 19 | from kpi import CostKpi 20 | from kpi import DurationKpi 21 | from kpi import AccKpi 22 | 23 | each_step_duration_atis_slot_card1 = DurationKpi('each_step_duration_atis_slot_card1', 0.01, 0, actived=True) 24 | train_loss_atis_slot_card1 = CostKpi('train_loss_atis_slot_card1', 0.08, 0, actived=True) 25 | train_acc_atis_slot_card1 = CostKpi('train_acc_atis_slot_card1', 0.01, 0, actived=True) 26 | each_step_duration_atis_slot_card4 = DurationKpi('each_step_duration_atis_slot_card4', 0.06, 0, actived=True) 27 | train_loss_atis_slot_card4 = CostKpi('train_loss_atis_slot_card4', 0.03, 0, actived=True) 28 | train_acc_atis_slot_card4 = CostKpi('train_acc_atis_slot_card4', 0.01, 0, actived=True) 29 | 30 | tracking_kpis = [ 31 | each_step_duration_atis_slot_card1, 32 | train_loss_atis_slot_card1, 33 | train_acc_atis_slot_card1, 34 | each_step_duration_atis_slot_card4, 35 | train_loss_atis_slot_card4, 36 | train_acc_atis_slot_card4, 37 | ] 38 | 39 | 40 | def parse_log(log): 41 | ''' 42 | This method should be implemented by model developers. 43 | 44 | The suggestion: 45 | 46 | each line in the log should be key, value, for example: 47 | 48 | " 49 | train_cost\t1.0 50 | test_cost\t1.0 51 | train_cost\t1.0 52 | train_cost\t1.0 53 | train_acc\t1.2 54 | " 55 | ''' 56 | for line in log.split('\n'): 57 | fs = line.strip().split('\t') 58 | print(fs) 59 | if len(fs) == 3 and fs[0] == 'kpis': 60 | kpi_name = fs[1] 61 | kpi_value = float(fs[2]) 62 | yield kpi_name, kpi_value 63 | 64 | 65 | def log_to_ce(log): 66 | kpi_tracker = {} 67 | for kpi in tracking_kpis: 68 | kpi_tracker[kpi.name] = kpi 69 | 70 | for (kpi_name, kpi_value) in parse_log(log): 71 | print(kpi_name, kpi_value) 72 | kpi_tracker[kpi_name].add_record(kpi_value) 73 | kpi_tracker[kpi_name].persist() 74 | 75 | 76 | if __name__ == '__main__': 77 | log = sys.stdin.read() 78 | log_to_ce(log) 79 | -------------------------------------------------------------------------------- /DAM/utils/douban_evaluation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | from sklearn.metrics import average_precision_score 4 | 5 | def mean_average_precision(sort_data): 6 | #to do 7 | count_1 = 0 8 | sum_precision = 0 9 | for index in range(len(sort_data)): 10 | if sort_data[index][1] == 1: 11 | count_1 += 1 12 | sum_precision += 1.0 * count_1 / (index+1) 13 | return sum_precision / count_1 14 | 15 | def mean_reciprocal_rank(sort_data): 16 | sort_lable = [s_d[1] for s_d in sort_data] 17 | assert 1 in sort_lable 18 | return 1.0 / (1 + sort_lable.index(1)) 19 | 20 | def precision_at_position_1(sort_data): 21 | if sort_data[0][1] == 1: 22 | return 1 23 | else: 24 | return 0 25 | 26 | def recall_at_position_k_in_10(sort_data, k): 27 | sort_lable = [s_d[1] for s_d in sort_data] 28 | select_lable = sort_lable[:k] 29 | return 1.0 * select_lable.count(1) / sort_lable.count(1) 30 | 31 | def evaluation_one_session(data): 32 | sort_data = sorted(data, key=lambda x: x[0], reverse=True) 33 | m_a_p = mean_average_precision(sort_data) 34 | m_r_r = mean_reciprocal_rank(sort_data) 35 | p_1 = precision_at_position_1(sort_data) 36 | r_1 = recall_at_position_k_in_10(sort_data, 1) 37 | r_2 = recall_at_position_k_in_10(sort_data, 2) 38 | r_5 = recall_at_position_k_in_10(sort_data, 5) 39 | return m_a_p, m_r_r, p_1, r_1, r_2, r_5 40 | 41 | def evaluate(file_path): 42 | sum_m_a_p = 0 43 | sum_m_r_r = 0 44 | sum_p_1 = 0 45 | sum_r_1 = 0 46 | sum_r_2 = 0 47 | sum_r_5 = 0 48 | 49 | i = 0 50 | total_num = 0 51 | with open(file_path, 'r') as infile: 52 | for line in infile: 53 | if i % 10 == 0: 54 | data = [] 55 | 56 | tokens = line.strip().split('\t') 57 | data.append((float(tokens[0]), int(tokens[1]))) 58 | 59 | if i % 10 == 9: 60 | total_num += 1 61 | m_a_p, m_r_r, p_1, r_1, r_2, r_5 = evaluation_one_session(data) 62 | sum_m_a_p += m_a_p 63 | sum_m_r_r += m_r_r 64 | sum_p_1 += p_1 65 | sum_r_1 += r_1 66 | sum_r_2 += r_2 67 | sum_r_5 += r_5 68 | 69 | i += 1 70 | 71 | #print('total num: %s' %total_num) 72 | #print('MAP: %s' %(1.0*sum_m_a_p/total_num)) 73 | #print('MRR: %s' %(1.0*sum_m_r_r/total_num)) 74 | #print('P@1: %s' %(1.0*sum_p_1/total_num)) 75 | return (1.0*sum_m_a_p/total_num, 1.0*sum_m_r_r/total_num, 1.0*sum_p_1/total_num, 76 | 1.0*sum_r_1/total_num, 1.0*sum_r_2/total_num, 1.0*sum_r_5/total_num) 77 | 78 | if __name__ == '__main__': 79 | result = evaluate(sys.argv[1]) 80 | for r in result: 81 | print(r) 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /DAM/bin/test_and_evaluate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | 5 | import cPickle as pickle 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | import utils.reader as reader 10 | import utils.evaluation as eva 11 | 12 | 13 | def test(conf, _model): 14 | 15 | if not os.path.exists(conf['save_path']): 16 | os.makedirs(conf['save_path']) 17 | 18 | # load data 19 | print('starting loading data') 20 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 21 | train_data, val_data, test_data = pickle.load(open(conf["data_path"], 'rb')) 22 | print('finish loading data') 23 | 24 | test_batches = reader.build_batches(test_data, conf) 25 | 26 | print("finish building test batches") 27 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 28 | 29 | # refine conf 30 | test_batch_num = len(test_batches["response"]) 31 | 32 | print('configurations: %s' %conf) 33 | 34 | 35 | _graph = _model.build_graph() 36 | print('build graph sucess') 37 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 38 | 39 | with tf.Session(graph=_graph) as sess: 40 | #_model.init.run(); 41 | _model.saver.restore(sess, conf["init_model"]) 42 | print("sucess init %s" %conf["init_model"]) 43 | 44 | batch_index = 0 45 | step = 0 46 | 47 | score_file_path = conf['save_path'] + 'score.test' 48 | score_file = open(score_file_path, 'w') 49 | 50 | print('starting test') 51 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 52 | for batch_index in xrange(test_batch_num): 53 | 54 | feed = { 55 | _model.turns: test_batches["turns"][batch_index], 56 | _model.tt_turns_len: test_batches["tt_turns_len"][batch_index], 57 | _model.every_turn_len: test_batches["every_turn_len"][batch_index], 58 | _model.response: test_batches["response"][batch_index], 59 | _model.response_len: test_batches["response_len"][batch_index], 60 | _model.label: test_batches["label"][batch_index] 61 | } 62 | 63 | scores = sess.run(_model.logits, feed_dict = feed) 64 | 65 | for i in xrange(conf["batch_size"]): 66 | score_file.write( 67 | str(scores[i]) + '\t' + 68 | str(test_batches["label"][batch_index][i]) + '\n') 69 | #str(sum(test_batches["every_turn_len"][batch_index][i]) / test_batches['tt_turns_len'][batch_index][i]) + '\t' + 70 | #str(test_batches['tt_turns_len'][batch_index][i]) + '\n') 71 | 72 | score_file.close() 73 | print('finish test') 74 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 75 | 76 | 77 | #write evaluation result 78 | result = eva.evaluate(score_file_path) 79 | result_file_path = conf["save_path"] + "result.test" 80 | with open(result_file_path, 'w') as out_file: 81 | for p_at in result: 82 | out_file.write(str(p_at) + '\n') 83 | print('finish evaluation') 84 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))) 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /DAM/README.md: -------------------------------------------------------------------------------- 1 | [](https://opensource.org/licenses/MIT) 2 | 3 | # __New Record__ 4 | 5 | We got the new, best score of R_10 at 1 (85.67%) in the Ubuntu Corpus by incorporating ERNIE_English, an English pre-trained model from Baidu. Please refer to DMTK (the Dialogue Modeling ToolKit) for more details. https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/dialogue_model_toolkit 6 | 7 | 8 | 9 | # __Deep Attention Matching Network__ 10 | 11 | This is the source code of Deep Attention Matching network (DAM), that is proposed for multi-turn response selection in the retrieval-based chatbot. 12 | 13 | DAM is a neural matching network that entirely based on attention mechanism. The motivation of DAM is to capture those semantic dependencies, among dialogue elements at different level of granularities, in multi-turn conversation as matching evidences, in order to better match response candidate with its multi-turn context. DAM will appear on ACL-2018, please find our paper at: http://acl2018.org/conference/accepted-papers/. 14 | 15 | ## __Paddle Version__ 16 | 17 | DAM is originally implemented with Tensorflow, we highly recommend using the paddle version as Paddle supports parallely training with very large corpus. 18 | 19 | You can find the paddle version at: https://github.com/PaddlePaddle/models/tree/develop/fluid . 20 | 21 | ## __Network__ 22 | 23 | DAM is inspired by Transformer in Machine Translation (Vaswani et al., 2017), and we extend the key attention mechanism of Transformer in two perspectives and introduce those two kinds of attention in one uniform neural network. 24 | 25 | - **self-attention** To gradually capture semantic representations in different granularities by stacking attention from word-level embeddings. Those multi-grained semantic representations would facilitate exploring segmental dependencies between context and response. 26 | 27 | - **cross-attention** Attention across context and response can generally capture the relevance in dependency between segment pairs, which could provide complementary information to textual relevance for matching response with multi-turn context. 28 | 29 |
31 |
38 |
39 | ## __Usage__
40 |
41 | First, please download [data](https://pan.baidu.com/s/1hakfuuwdS8xl7NyxlWzRiQ "data") and unzip it:
42 | ```
43 | cd data
44 | unzip data.zip
45 | ```
46 |
47 | If you want use well trained models directly, please download [models](https://pan.baidu.com/s/1pl4d63MBxihgrEWWfdAz0w "models") and unzip it:
48 | ```
49 | cd output
50 | unzip output.zip
51 | ```
52 |
53 | Train and test the model by:
54 | ```
55 | sh run.sh
56 | ```
57 |
58 | ## __Dependencies__
59 |
60 | - Python >= 2.7.3
61 | - Tensorflow == 1.2.1
62 |
63 | ## __Citation__
64 |
65 | The following article describe the DAM in detail. We recommend citing this article as default.
66 |
67 | ```
68 | @inproceedings{ ,
69 | title={Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network},
70 | author={Xiangyang Zhou, Lu Li, Daxiang Dong, Yi Liu, Ying Chen, Wayne Xin Zhao, Dianhai Yu and Hua Wu},
71 | booktitle={Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
72 | volume={1},
73 | pages={ -- },
74 | year={2018}
75 | }
76 | ```
77 |
78 |
79 |
--------------------------------------------------------------------------------
/ADE/inference_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """save inference model for auto dialogue evaluation"""
15 |
16 | import os
17 | import sys
18 | import six
19 | import numpy as np
20 | import time
21 | import paddle
22 | import paddle.fluid as fluid
23 |
24 | import ade.reader as reader
25 | from ade_net import create_net
26 |
27 | from ade.utils.configure import PDConfig
28 | from ade.utils.input_field import InputField
29 | from ade.utils.model_check import check_cuda
30 | import ade.utils.save_load_io as save_load_io
31 |
32 |
33 | def do_save_inference_model(args):
34 |
35 | test_prog = fluid.default_main_program()
36 | startup_prog = fluid.default_startup_program()
37 |
38 | with fluid.program_guard(test_prog, startup_prog):
39 | test_prog.random_seed = args.random_seed
40 | startup_prog.random_seed = args.random_seed
41 |
42 | with fluid.unique_name.guard():
43 |
44 | context_wordseq = fluid.data(
45 | name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
46 | response_wordseq = fluid.data(
47 | name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
48 | labels = fluid.data(
49 | name='labels', shape=[-1, 1], dtype='int64')
50 |
51 | input_inst = [context_wordseq, response_wordseq, labels]
52 | input_field = InputField(input_inst)
53 | data_reader = fluid.io.PyReader(feed_list=input_inst,
54 | capacity=4, iterable=False)
55 |
56 | logits = create_net(
57 | is_training=False,
58 | model_input=input_field,
59 | args=args
60 | )
61 |
62 | if args.use_cuda:
63 | place = fluid.CUDAPlace(0)
64 | else:
65 | place = fluid.CPUPlace()
66 |
67 | exe = fluid.Executor(place)
68 | exe.run(startup_prog)
69 |
70 | assert (args.init_from_params) or (args.init_from_pretrain_model)
71 |
72 | if args.init_from_params:
73 | save_load_io.init_from_params(args, exe, test_prog)
74 | elif args.init_from_pretrain_model:
75 | save_load_io.init_from_pretrain_model(args, exe, test_prog)
76 |
77 | # saving inference model
78 | fluid.io.save_inference_model(
79 | args.inference_model_dir,
80 | feeded_var_names=[
81 | input_field.context_wordseq.name,
82 | input_field.response_wordseq.name,
83 | ],
84 | target_vars=[
85 | logits,
86 | ],
87 | executor=exe,
88 | main_program=test_prog,
89 | model_filename="model.pdmodel",
90 | params_filename="params.pdparams")
91 |
92 | print("save inference model at %s" % (args.inference_model_dir))
93 |
94 |
95 | if __name__ == "__main__":
96 | args = PDConfig(yaml_file="./data/config/ade.yaml")
97 | args.build()
98 |
99 | check_cuda(args.use_cuda)
100 |
101 | do_save_inference_model(args)
102 |
--------------------------------------------------------------------------------
/DGU/dgu/utils/fp16.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import print_function
16 | import paddle
17 | import paddle.fluid as fluid
18 |
19 |
20 | def cast_fp16_to_fp32(i, o, prog):
21 | prog.global_block().append_op(
22 | type="cast",
23 | inputs={"X": i},
24 | outputs={"Out": o},
25 | attrs={
26 | "in_dtype": fluid.core.VarDesc.VarType.FP16,
27 | "out_dtype": fluid.core.VarDesc.VarType.FP32
28 | })
29 |
30 |
31 | def cast_fp32_to_fp16(i, o, prog):
32 | prog.global_block().append_op(
33 | type="cast",
34 | inputs={"X": i},
35 | outputs={"Out": o},
36 | attrs={
37 | "in_dtype": fluid.core.VarDesc.VarType.FP32,
38 | "out_dtype": fluid.core.VarDesc.VarType.FP16
39 | })
40 |
41 |
42 | def copy_to_master_param(p, block):
43 | v = block.vars.get(p.name, None)
44 | if v is None:
45 | raise ValueError("no param name %s found!" % p.name)
46 | new_p = fluid.framework.Parameter(
47 | block=block,
48 | shape=v.shape,
49 | dtype=fluid.core.VarDesc.VarType.FP32,
50 | type=v.type,
51 | lod_level=v.lod_level,
52 | stop_gradient=p.stop_gradient,
53 | trainable=p.trainable,
54 | optimize_attr=p.optimize_attr,
55 | regularizer=p.regularizer,
56 | gradient_clip_attr=p.gradient_clip_attr,
57 | error_clip=p.error_clip,
58 | name=v.name + ".master")
59 | return new_p
60 |
61 |
62 | def create_master_params_grads(params_grads, main_prog, startup_prog,
63 | loss_scaling):
64 | master_params_grads = []
65 | tmp_role = main_prog._current_role
66 | OpRole = fluid.core.op_proto_and_checker_maker.OpRole
67 | main_prog._current_role = OpRole.Backward
68 | for p, g in params_grads:
69 | # create master parameters
70 | master_param = copy_to_master_param(p, main_prog.global_block())
71 | startup_master_param = startup_prog.global_block()._clone_variable(
72 | master_param)
73 | startup_p = startup_prog.global_block().var(p.name)
74 | cast_fp16_to_fp32(startup_p, startup_master_param, startup_prog)
75 | # cast fp16 gradients to fp32 before apply gradients
76 | if g.name.find("layer_norm") > -1:
77 | if loss_scaling > 1:
78 | scaled_g = g / float(loss_scaling)
79 | else:
80 | scaled_g = g
81 | master_params_grads.append([p, scaled_g])
82 | continue
83 | master_grad = fluid.layers.cast(g, "float32")
84 | if loss_scaling > 1:
85 | master_grad = master_grad / float(loss_scaling)
86 | master_params_grads.append([master_param, master_grad])
87 | main_prog._current_role = tmp_role
88 | return master_params_grads
89 |
90 |
91 | def master_param_to_train_param(master_params_grads, params_grads, main_prog):
92 | for idx, m_p_g in enumerate(master_params_grads):
93 | train_p, _ = params_grads[idx]
94 | if train_p.name.find("layer_norm") > -1:
95 | continue
96 | with main_prog._optimized_guard([m_p_g[0], m_p_g[1]]):
97 | cast_fp32_to_fp16(m_p_g[0], train_p, main_prog)
98 |
--------------------------------------------------------------------------------
/ADE/ade/utils/save_load_io.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """save or load model api"""
15 |
16 | import os
17 | import sys
18 |
19 | import paddle
20 | import paddle.fluid as fluid
21 |
22 |
23 | def init_from_pretrain_model(args, exe, program):
24 |
25 | assert isinstance(args.init_from_pretrain_model, str)
26 |
27 | if not os.path.exists(args.init_from_pretrain_model):
28 | raise Warning("The pretrained params do not exist.")
29 | return False
30 |
31 | def existed_params(var):
32 | if not isinstance(var, fluid.framework.Parameter):
33 | return False
34 | return os.path.exists(
35 | os.path.join(args.init_from_pretrain_model, var.name))
36 |
37 | fluid.io.load_vars(
38 | exe,
39 | args.init_from_pretrain_model,
40 | main_program=program,
41 | predicate=existed_params)
42 |
43 | print("finish initing model from pretrained params from %s" %
44 | (args.init_from_pretrain_model))
45 |
46 | return True
47 |
48 |
49 | def init_from_checkpoint(args, exe, program):
50 |
51 | assert isinstance(args.init_from_checkpoint, str)
52 |
53 | if not os.path.exists(args.init_from_checkpoint):
54 | raise Warning("the checkpoint path does not exist.")
55 | return False
56 |
57 | fluid.io.load_persistables(
58 | executor=exe,
59 | dirname=args.init_from_checkpoint,
60 | main_program=program,
61 | filename="checkpoint.pdckpt")
62 |
63 | print("finish initing model from checkpoint from %s" %
64 | (args.init_from_checkpoint))
65 |
66 | return True
67 |
68 |
69 | def init_from_params(args, exe, program):
70 |
71 | assert isinstance(args.init_from_params, str)
72 |
73 | if not os.path.exists(args.init_from_params):
74 | raise Warning("the params path does not exist.")
75 | return False
76 |
77 | fluid.io.load_params(
78 | executor=exe,
79 | dirname=args.init_from_params,
80 | main_program=program,
81 | filename="params.pdparams")
82 |
83 | print("finish init model from params from %s" % (args.init_from_params))
84 |
85 | return True
86 |
87 |
88 | def save_checkpoint(args, exe, program, dirname):
89 |
90 | assert isinstance(args.save_model_path, str)
91 |
92 | checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint)
93 |
94 | if not os.path.exists(checkpoint_dir):
95 | os.mkdir(checkpoint_dir)
96 |
97 | fluid.io.save_persistables(
98 | exe,
99 | os.path.join(checkpoint_dir, dirname),
100 | main_program=program,
101 | filename="checkpoint.pdckpt")
102 |
103 | print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname)))
104 |
105 | return True
106 |
107 |
108 | def save_param(args, exe, program, dirname):
109 |
110 | assert isinstance(args.save_model_path, str)
111 |
112 | param_dir = os.path.join(args.save_model_path, args.save_param)
113 |
114 | if not os.path.exists(param_dir):
115 | os.makedirs(param_dir)
116 |
117 | fluid.io.save_params(
118 | exe,
119 | os.path.join(param_dir, dirname),
120 | main_program=program,
121 | filename="params.pdparams")
122 | print("save parameters at %s" % (os.path.join(param_dir, dirname)))
123 |
124 | return True
125 |
126 |
127 |
--------------------------------------------------------------------------------
/DGU/dgu/utils/save_load_io.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """save or load model api"""
15 |
16 | import os
17 | import sys
18 |
19 | import paddle
20 | import paddle.fluid as fluid
21 |
22 |
23 | def init_from_pretrain_model(args, exe, program):
24 |
25 | assert isinstance(args.init_from_pretrain_model, str)
26 |
27 | if not os.path.exists(args.init_from_pretrain_model):
28 | raise Warning("The pretrained params do not exist.")
29 | return False
30 |
31 | def existed_params(var):
32 | if not isinstance(var, fluid.framework.Parameter):
33 | return False
34 | return os.path.exists(
35 | os.path.join(args.init_from_pretrain_model, var.name))
36 |
37 | fluid.io.load_vars(
38 | exe,
39 | args.init_from_pretrain_model,
40 | main_program=program,
41 | predicate=existed_params)
42 |
43 | print("finish initing model from pretrained params from %s" %
44 | (args.init_from_pretrain_model))
45 |
46 | return True
47 |
48 |
49 | def init_from_checkpoint(args, exe, program):
50 |
51 | assert isinstance(args.init_from_checkpoint, str)
52 |
53 | if not os.path.exists(args.init_from_checkpoint):
54 | raise Warning("the checkpoint path does not exist.")
55 | return False
56 |
57 | fluid.io.load_persistables(
58 | executor=exe,
59 | dirname=args.init_from_checkpoint,
60 | main_program=program,
61 | filename="checkpoint.pdckpt")
62 |
63 | print("finish initing model from checkpoint from %s" %
64 | (args.init_from_checkpoint))
65 |
66 | return True
67 |
68 |
69 | def init_from_params(args, exe, program):
70 |
71 | assert isinstance(args.init_from_params, str)
72 |
73 | if not os.path.exists(args.init_from_params):
74 | raise Warning("the params path does not exist.")
75 | return False
76 |
77 | fluid.io.load_params(
78 | executor=exe,
79 | dirname=args.init_from_params,
80 | main_program=program,
81 | filename="params.pdparams")
82 |
83 | print("finish init model from params from %s" % (args.init_from_params))
84 |
85 | return True
86 |
87 |
88 | def save_checkpoint(args, exe, program, dirname):
89 |
90 | assert isinstance(args.save_model_path, str)
91 |
92 | checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint)
93 |
94 | if not os.path.exists(checkpoint_dir):
95 | os.mkdir(checkpoint_dir)
96 |
97 | fluid.io.save_persistables(
98 | exe,
99 | os.path.join(checkpoint_dir, dirname),
100 | main_program=program,
101 | filename="checkpoint.pdckpt")
102 |
103 | print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname)))
104 |
105 | return True
106 |
107 |
108 | def save_param(args, exe, program, dirname):
109 |
110 | assert isinstance(args.save_model_path, str)
111 |
112 | param_dir = os.path.join(args.save_model_path, args.save_param)
113 |
114 | if not os.path.exists(param_dir):
115 | os.makedirs(param_dir)
116 |
117 | fluid.io.save_params(
118 | exe,
119 | os.path.join(param_dir, dirname),
120 | main_program=program,
121 | filename="params.pdparams")
122 | print("save parameters at %s" % (os.path.join(param_dir, dirname)))
123 |
124 | return True
125 |
126 |
127 |
--------------------------------------------------------------------------------
/ADE/ade_net.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Network for auto dialogue evaluation"""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import paddle
21 | import paddle.fluid as fluid
22 |
23 |
24 | def create_net(
25 | is_training,
26 | model_input,
27 | args,
28 | clip_value=10.0,
29 | word_emb_name="shared_word_emb",
30 | lstm_W_name="shared_lstm_W",
31 | lstm_bias_name="shared_lstm_bias"):
32 |
33 | context_wordseq = model_input.context_wordseq
34 | response_wordseq = model_input.response_wordseq
35 | label = model_input.labels
36 |
37 | #emb
38 | context_emb = fluid.input.embedding(
39 | input=context_wordseq,
40 | size=[args.vocab_size, args.emb_size],
41 | is_sparse=True,
42 | param_attr=fluid.ParamAttr(
43 | name=word_emb_name,
44 | initializer=fluid.initializer.Normal(scale=0.1)))
45 |
46 | response_emb = fluid.input.embedding(
47 | input=response_wordseq,
48 | size=[args.vocab_size, args.emb_size],
49 | is_sparse=True,
50 | param_attr=fluid.ParamAttr(
51 | name=word_emb_name,
52 | initializer=fluid.initializer.Normal(scale=0.1)))
53 |
54 | #fc to fit dynamic LSTM
55 | context_fc = fluid.layers.fc(
56 | input=context_emb,
57 | size=args.hidden_size * 4,
58 | param_attr=fluid.ParamAttr(name='fc_weight'),
59 | bias_attr=fluid.ParamAttr(name='fc_bias'))
60 |
61 | response_fc = fluid.layers.fc(
62 | input=response_emb,
63 | size=args.hidden_size * 4,
64 | param_attr=fluid.ParamAttr(name='fc_weight'),
65 | bias_attr=fluid.ParamAttr(name='fc_bias'))
66 |
67 | #LSTM
68 | context_rep, _ = fluid.layers.dynamic_lstm(
69 | input=context_fc,
70 | size=args.hidden_size * 4,
71 | param_attr=fluid.ParamAttr(name=lstm_W_name),
72 | bias_attr=fluid.ParamAttr(name=lstm_bias_name))
73 | context_rep = fluid.layers.sequence_last_step(context_rep)
74 |
75 | response_rep, _ = fluid.layers.dynamic_lstm(
76 | input=response_fc,
77 | size=args.hidden_size * 4,
78 | param_attr=fluid.ParamAttr(name=lstm_W_name),
79 | bias_attr=fluid.ParamAttr(name=lstm_bias_name))
80 | response_rep = fluid.layers.sequence_last_step(input=response_rep)
81 |
82 | logits = fluid.layers.bilinear_tensor_product(
83 | context_rep, response_rep, size=1)
84 |
85 | if args.loss_type == 'CLS':
86 | label = fluid.layers.cast(x=label, dtype='float32')
87 | loss = fluid.layers.sigmoid_cross_entropy_with_logits(logits, label)
88 | loss = fluid.layers.reduce_mean(
89 | fluid.layers.clip(
90 | loss, min=-clip_value, max=clip_value))
91 | elif args.loss_type == 'L2':
92 | norm_score = 2 * fluid.layers.sigmoid(logits)
93 | label = fluid.layers.cast(x=label, dtype='float32')
94 | loss = fluid.layers.square_error_cost(norm_score, label) / 4
95 | loss = fluid.layers.reduce_mean(loss)
96 | else:
97 | raise ValueError
98 |
99 | if is_training:
100 | return loss
101 | else:
102 | return logits
103 |
104 |
105 | def set_word_embedding(word_emb, place, word_emb_name="shared_word_emb"):
106 | """
107 | Set word embedding
108 | """
109 | word_emb_param = fluid.global_scope().find_var(
110 | word_emb_name).get_tensor()
111 | word_emb_param.set(word_emb, place)
112 |
113 |
--------------------------------------------------------------------------------
/ADE/predict.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """predict auto dialogue evaluation task"""
16 | import io
17 | import os
18 | import sys
19 | import six
20 | import time
21 | import numpy as np
22 |
23 | import paddle
24 | import paddle.fluid as fluid
25 |
26 | import ade.reader as reader
27 | from ade_net import create_net
28 |
29 | from ade.utils.configure import PDConfig
30 | from ade.utils.input_field import InputField
31 | from ade.utils.model_check import check_cuda
32 | import ade.utils.save_load_io as save_load_io
33 |
34 |
35 | def do_predict(args):
36 | """
37 | predict function
38 | """
39 | test_prog = fluid.default_main_program()
40 | startup_prog = fluid.default_startup_program()
41 |
42 | with fluid.program_guard(test_prog, startup_prog):
43 | test_prog.random_seed = args.random_seed
44 | startup_prog.random_seed = args.random_seed
45 |
46 | with fluid.unique_name.guard():
47 |
48 | context_wordseq = fluid.data(
49 | name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
50 | response_wordseq = fluid.data(
51 | name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
52 | labels = fluid.data(
53 | name='labels', shape=[-1, 1], dtype='int64')
54 |
55 | input_inst = [context_wordseq, response_wordseq, labels]
56 | input_field = InputField(input_inst)
57 | data_reader = fluid.io.PyReader(feed_list=input_inst,
58 | capacity=4, iterable=False)
59 |
60 | logits = create_net(
61 | is_training=False,
62 | model_input=input_field,
63 | args=args
64 | )
65 | logits.persistable = True
66 |
67 | fetch_list = [logits.name]
68 | #for_test is True if change the is_test attribute of operators to True
69 | test_prog = test_prog.clone(for_test=True)
70 | if args.use_cuda:
71 | place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
72 | else:
73 | place = fluid.CPUPlace()
74 |
75 | exe = fluid.Executor(place)
76 | exe.run(startup_prog)
77 |
78 | assert (args.init_from_params) or (args.init_from_pretrain_model)
79 | if args.init_from_params:
80 | save_load_io.init_from_params(args, exe, test_prog)
81 | if args.init_from_pretrain_model:
82 | save_load_io.init_from_pretrain_model(args, exe, test_prog)
83 |
84 | compiled_test_prog = fluid.CompiledProgram(test_prog)
85 |
86 | processor = reader.DataProcessor(
87 | data_path=args.predict_file,
88 | max_seq_length=args.max_seq_len,
89 | batch_size=args.batch_size)
90 |
91 | batch_generator = processor.data_generator(
92 | place=place,
93 | phase="test",
94 | shuffle=False,
95 | sample_pro=1)
96 | num_test_examples = processor.get_num_examples(phase='test')
97 |
98 | data_reader.decorate_batch_generator(batch_generator)
99 | data_reader.start()
100 |
101 | scores = []
102 | while True:
103 | try:
104 | results = exe.run(compiled_test_prog, fetch_list=fetch_list)
105 | scores.extend(results[0])
106 | except fluid.core.EOFException:
107 | data_reader.reset()
108 | break
109 |
110 | scores = scores[: num_test_examples]
111 | print("Write the predicted results into the output_prediction_file")
112 | fw = io.open(args.output_prediction_file, 'w', encoding="utf8")
113 | for index, score in enumerate(scores):
114 | fw.write("%s\t%s\n" % (index, score))
115 | print("finish........................................")
116 |
117 |
118 | if __name__ == "__main__":
119 |
120 | args = PDConfig(yaml_file="./data/config/ade.yaml")
121 | args.build()
122 | args.Print()
123 |
124 | check_cuda(args.use_cuda)
125 |
126 | do_predict(args)
127 |
--------------------------------------------------------------------------------
/DGU/inference_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """save inference model"""
15 |
16 | import os
17 | import sys
18 | import argparse
19 | import collections
20 | import numpy as np
21 |
22 | import paddle
23 | import paddle.fluid as fluid
24 |
25 | from dgu.utils.configure import PDConfig
26 | from dgu.utils.input_field import InputField
27 | from dgu.utils.model_check import check_cuda
28 | import dgu.utils.save_load_io as save_load_io
29 |
30 | import dgu.reader as reader
31 | from dgu_net import create_net
32 | import dgu.define_paradigm as define_paradigm
33 |
34 |
35 | def do_save_inference_model(args):
36 | """save inference model function"""
37 |
38 | task_name = args.task_name.lower()
39 | paradigm_inst = define_paradigm.Paradigm(task_name)
40 |
41 | processors = {
42 | 'udc': reader.UDCProcessor,
43 | 'swda': reader.SWDAProcessor,
44 | 'mrda': reader.MRDAProcessor,
45 | 'atis_slot': reader.ATISSlotProcessor,
46 | 'atis_intent': reader.ATISIntentProcessor,
47 | 'dstc2': reader.DSTC2Processor,
48 | }
49 |
50 | test_prog = fluid.default_main_program()
51 | startup_prog = fluid.default_startup_program()
52 |
53 | with fluid.program_guard(test_prog, startup_prog):
54 | test_prog.random_seed = args.random_seed
55 | startup_prog.random_seed = args.random_seed
56 |
57 | with fluid.unique_name.guard():
58 |
59 | # define inputs of the network
60 | num_labels = len(processors[task_name].get_labels())
61 |
62 | src_ids = fluid.data(
63 | name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
64 | pos_ids = fluid.data(
65 | name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
66 | sent_ids = fluid.data(
67 | name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
68 | input_mask = fluid.data(
69 | name='input_mask', shape=[-1, args.max_seq_len], dtype='float32')
70 | if args.task_name == 'atis_slot':
71 | labels = fluid.data(
72 | name='labels', shape=[-1, args.max_seq_len], dtype='int64')
73 | elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
74 | labels = fluid.data(
75 | name='labels', shape=[-1, num_labels], dtype='int64')
76 | else:
77 | labels = fluid.data(
78 | name='labels', shape=[-1, 1], dtype='int64')
79 |
80 | input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
81 | input_field = InputField(input_inst)
82 |
83 | results = create_net(
84 | is_training=False,
85 | model_input=input_field,
86 | num_labels=num_labels,
87 | paradigm_inst=paradigm_inst,
88 | args=args)
89 | probs = results.get("probs", None)
90 |
91 | if args.use_cuda:
92 | place = fluid.CUDAPlace(0)
93 | else:
94 | place = fluid.CPUPlace()
95 |
96 | exe = fluid.Executor(place)
97 | exe.run(startup_prog)
98 |
99 | assert (args.init_from_params) or (args.init_from_pretrain_model)
100 |
101 | if args.init_from_params:
102 | save_load_io.init_from_params(args, exe, test_prog)
103 | elif args.init_from_pretrain_model:
104 | save_load_io.init_from_pretrain_model(args, exe, test_prog)
105 |
106 | # saving inference model
107 | fluid.io.save_inference_model(
108 | args.inference_model_dir,
109 | feeded_var_names=[
110 | input_field.src_ids.name,
111 | input_field.pos_ids.name,
112 | input_field.sent_ids.name,
113 | input_field.input_mask.name
114 | ],
115 | target_vars=[
116 | probs
117 | ],
118 | executor=exe,
119 | main_program=test_prog,
120 | model_filename="model.pdmodel",
121 | params_filename="params.pdparams")
122 |
123 | print("save inference model at %s" % (args.inference_model_dir))
124 |
125 |
126 | if __name__ == "__main__":
127 |
128 | args = PDConfig(yaml_file="./data/config/dgu.yaml")
129 | args.build()
130 |
131 | check_cuda(args.use_cuda)
132 |
133 | do_save_inference_model(args)
134 |
--------------------------------------------------------------------------------
/ADE/ade/reader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Reader for auto dialogue evaluation"""
16 |
17 | import io
18 | import sys
19 | import time
20 | import random
21 | import numpy as np
22 |
23 | import paddle
24 | import paddle.fluid as fluid
25 |
26 |
27 | class DataProcessor(object):
28 | def __init__(self, data_path, max_seq_length, batch_size):
29 | """init"""
30 | self.data_file = data_path
31 | self.max_seq_len = max_seq_length
32 | self.batch_size = batch_size
33 | self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
34 |
35 | def get_examples(self):
36 | """load examples"""
37 | examples = []
38 | index = 0
39 | fr = io.open(self.data_file, 'r', encoding="utf8")
40 | for line in fr:
41 | if index !=0 and index % 100 == 0:
42 | print("processing data: %d" % index)
43 | index += 1
44 | examples.append(line.strip())
45 | return examples
46 |
47 | def get_num_examples(self, phase):
48 | """Get number of examples for train, dev or test."""
49 | if phase not in ['train', 'dev', 'test']:
50 | raise ValueError(
51 | "Unknown phase, which should be in ['train', 'dev', 'test'].")
52 | count = len(io.open(self.data_file, 'r', encoding="utf8").readlines())
53 | self.num_examples[phase] = count
54 | return self.num_examples[phase]
55 |
56 | def data_generator(self,
57 | place,
58 | phase="train",
59 | shuffle=True,
60 | sample_pro=1):
61 | """
62 | Generate data for train, dev or test.
63 |
64 | Args:
65 | phase: string. The phase for which to generate data.
66 | shuffle: bool. Whether to shuffle examples.
67 | sample_pro: sample data ratio
68 | """
69 | examples = self.get_examples()
70 | if shuffle:
71 | np.random.shuffle(examples)
72 |
73 | def batch_reader():
74 | """read batch data"""
75 | batch = []
76 | for example in examples:
77 | if sample_pro < 1:
78 | if random.random() > sample_pro:
79 | continue
80 | tokens = example.strip().split('\t')
81 |
82 | if len(tokens) != 3:
83 | print("data format error: %s" % example.strip())
84 | print("please input data: context \t response \t label")
85 | continue
86 |
87 | context = [int(x) for x in tokens[0].split()[: self.max_seq_len]]
88 | response = [int(x) for x in tokens[1].split()[: self.max_seq_len]]
89 | label = [int(tokens[2])]
90 | instance = (context, response, label)
91 |
92 | if len(batch) < self.batch_size:
93 | batch.append(instance)
94 | else:
95 | if len(batch) == self.batch_size:
96 | yield batch
97 | batch = [instance]
98 |
99 | if len(batch) > 0:
100 | yield batch
101 |
102 | def create_lodtensor(data_ids, place):
103 | """create LodTensor for input ids"""
104 | cur_len = 0
105 | lod = [cur_len]
106 | seq_lens = [len(ids) for ids in data_ids]
107 | for l in seq_lens:
108 | cur_len += l
109 | lod.append(cur_len)
110 | flattened_data = np.concatenate(data_ids, axis=0).astype("int64")
111 | flattened_data = flattened_data.reshape([len(flattened_data), 1])
112 | res = fluid.LoDTensor()
113 | res.set(flattened_data, place)
114 | res.set_lod([lod])
115 | return res
116 |
117 | def wrapper():
118 | """yield batch data to network"""
119 | for batch_data in batch_reader():
120 | context_ids = [batch[0] for batch in batch_data]
121 | response_ids = [batch[1] for batch in batch_data]
122 | label_ids = [batch[2] for batch in batch_data]
123 | context_res = create_lodtensor(context_ids, place)
124 | response_res = create_lodtensor(response_ids, place)
125 | label_ids = np.array(label_ids).astype("int64").reshape([-1, 1])
126 | input_batch = [context_res, response_res, label_ids]
127 | yield input_batch
128 |
129 | return wrapper
130 |
131 |
--------------------------------------------------------------------------------
/DGU/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export FLAGS_sync_nccl_allreduce=0
4 | export FLAGS_eager_delete_tensor_gb=1
5 |
6 | export CUDA_VISIBLE_DEVICES=0
7 | if [ ! "$CUDA_VISIBLE_DEVICES" ]
8 | then
9 | export CPU_NUM=1
10 | use_cuda=false
11 | else
12 | use_cuda=true
13 | fi
14 |
15 | TASK_NAME=$1
16 | TASK_TYPE=$2
17 |
18 | BERT_BASE_PATH="./data/pretrain_model/uncased_L-12_H-768_A-12"
19 | INPUT_PATH="./data/input/data/${TASK_NAME}"
20 | SAVE_MODEL_PATH="./data/saved_models/${TASK_NAME}"
21 | TRAIN_MODEL_PATH="./data/saved_models/trained_models"
22 | OUTPUT_PATH="./data/output"
23 | INFERENCE_MODEL="data/inference_models"
24 | PYTHON_PATH="python"
25 |
26 | if [ -f ${SAVE_MODEL_PATH} ]; then
27 | rm ${SAVE_MODEL_PATH}
28 | fi
29 |
30 | if [ ! -d ${SAVE_MODEL_PATH} ]; then
31 | mkdir ${SAVE_MODEL_PATH}
32 | fi
33 |
34 | #parameter configuration
35 | if [ "${TASK_NAME}" = "udc" ]
36 | then
37 | save_steps=1000
38 | max_seq_len=210
39 | print_steps=1000
40 | batch_size=6720
41 | in_tokens=true
42 | epoch=2
43 | learning_rate=2e-5
44 | elif [ "${TASK_NAME}" = "swda" ]
45 | then
46 | save_steps=500
47 | max_seq_len=128
48 | print_steps=200
49 | batch_size=6720
50 | in_tokens=true
51 | epoch=3
52 | learning_rate=2e-5
53 | elif [ "${TASK_NAME}" = "mrda" ]
54 | then
55 | save_steps=500
56 | max_seq_len=128
57 | print_steps=200
58 | batch_size=4096
59 | in_tokens=true
60 | epoch=7
61 | learning_rate=2e-5
62 | elif [ "${TASK_NAME}" = "atis_intent" ]
63 | then
64 | save_steps=100
65 | max_seq_len=128
66 | print_steps=10
67 | batch_size=4096
68 | in_tokens=true
69 | epoch=20
70 | learning_rate=2e-5
71 | INPUT_PATH="./data/input/data/atis/${TASK_NAME}"
72 | elif [ "${TASK_NAME}" = "atis_slot" ]
73 | then
74 | save_steps=100
75 | max_seq_len=128
76 | print_steps=10
77 | batch_size=32
78 | in_tokens=False
79 | epoch=50
80 | learning_rate=2e-5
81 | INPUT_PATH="./data/input/data/atis/${TASK_NAME}"
82 | elif [ "${TASK_NAME}" = "dstc2" ]
83 | then
84 | save_steps=400
85 | print_steps=20
86 | batch_size=8192
87 | in_tokens=true
88 | epoch=40
89 | learning_rate=5e-5
90 | INPUT_PATH="./data/input/data/dstc2/${TASK_NAME}"
91 | if [ "${TASK_TYPE}" = "train" ]
92 | then
93 | max_seq_len=256
94 | else
95 | max_seq_len=512
96 | fi
97 | else
98 | echo "not support ${TASK_NAME} dataset.."
99 | exit 255
100 | fi
101 |
102 | #training
103 | function train()
104 | {
105 | $PYTHON_PATH -u main.py \
106 | --task_name=${TASK_NAME} \
107 | --use_cuda=$1 \
108 | --do_train=true \
109 | --in_tokens=${in_tokens} \
110 | --epoch=${epoch} \
111 | --batch_size=${batch_size} \
112 | --do_lower_case=true \
113 | --data_dir=${INPUT_PATH} \
114 | --bert_config_path=${BERT_BASE_PATH}/bert_config.json \
115 | --vocab_path=${BERT_BASE_PATH}/vocab.txt \
116 | --init_from_pretrain_model=${BERT_BASE_PATH}/params \
117 | --save_model_path=${SAVE_MODEL_PATH} \
118 | --save_param="params" \
119 | --save_steps=${save_steps} \
120 | --learning_rate=${learning_rate} \
121 | --weight_decay=0.01 \
122 | --max_seq_len=${max_seq_len} \
123 | --print_steps=${print_steps};
124 | }
125 |
126 | #predicting
127 | function predict()
128 | {
129 | $PYTHON_PATH -u main.py \
130 | --task_name=${TASK_NAME} \
131 | --use_cuda=$1 \
132 | --do_predict=true \
133 | --in_tokens=${in_tokens} \
134 | --batch_size=${batch_size} \
135 | --data_dir=${INPUT_PATH} \
136 | --do_lower_case=true \
137 | --init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params \
138 | --bert_config_path=${BERT_BASE_PATH}/bert_config.json \
139 | --vocab_path=${BERT_BASE_PATH}/vocab.txt \
140 | --output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME} \
141 | --max_seq_len=${max_seq_len};
142 | }
143 |
144 | #evaluating
145 | function evaluate()
146 | {
147 | $PYTHON_PATH -u main.py \
148 | --task_name=${TASK_NAME} \
149 | --use_cuda=$1 \
150 | --do_eval=True \
151 | --evaluation_file=${INPUT_PATH}/test.txt \
152 | --output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME};
153 | }
154 |
155 | #saving the inference model
156 | function save_inference()
157 | {
158 | $PYTHON_PATH -u main.py \
159 | --task_name=${TASK_NAME} \
160 | --use_cuda=$1 \
161 | --init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params \
162 | --do_save_inference_model=True \
163 | --bert_config_path=${BERT_BASE_PATH}/bert_config.json \
164 | --inference_model_dir=${INFERENCE_MODEL}/${TASK_NAME};
165 | }
166 |
167 | if [ "${TASK_TYPE}" = "train" ]
168 | then
169 | echo "train $TASK_NAME start..........";
170 | train $use_cuda;
171 | echo ""train $TASK_NAME finish..........
172 | elif [ "${TASK_TYPE}" = "predict" ]
173 | then
174 | echo "predict $TASK_NAME start..........";
175 | predict $use_cuda;
176 | echo "predict $TASK_NAME finish..........";
177 | elif [ "${TASK_TYPE}" = "evaluate" ]
178 | then
179 | export CUDA_VISIBLE_DEVICES=
180 | echo "evaluate $TASK_NAME start..........";
181 | evaluate false;
182 | echo "evaluate $TASK_NAME finish..........";
183 | elif [ "${TASK_TYPE}" = "inference" ]
184 | then
185 | echo "save $TASK_NAME inference model start..........";
186 | save_inference $use_cuda;
187 | echo "save $TASK_NAME inference model finish..........";
188 | elif [ "${TASK_TYPE}" = "all" ]
189 | then
190 | echo "Execute train、predict、evaluate and save inference model in sequence...."
191 | train $use_cuda;
192 | predict $use_cuda;
193 | evaluate false;
194 | save_inference $use_cuda;
195 | echo "done";
196 | else
197 | echo "Parameter $TASK_TYPE is not supported, you can input parameter in [train|predict|evaluate|inference|all]"
198 | exit 255;
199 | fi
200 |
201 |
202 |
--------------------------------------------------------------------------------
/DGU/dgu/optimization.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Optimization and learning rate scheduling."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import numpy as np
21 | import paddle.fluid as fluid
22 | from dgu.utils.fp16 import create_master_params_grads, master_param_to_train_param
23 |
24 |
25 | def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
26 | """ Applies linear warmup of learning rate from 0 and decay to 0."""
27 | with fluid.default_main_program()._lr_schedule_guard():
28 | lr = fluid.layers.tensor.create_global_var(
29 | shape=[1],
30 | value=0.0,
31 | dtype='float32',
32 | persistable=True,
33 | name="scheduled_learning_rate")
34 |
35 | global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
36 |
37 | with fluid.layers.control_flow.Switch() as switch:
38 | with switch.case(global_step < warmup_steps):
39 | warmup_lr = learning_rate * (global_step / warmup_steps)
40 | fluid.layers.tensor.assign(warmup_lr, lr)
41 | with switch.default():
42 | decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay(
43 | learning_rate=learning_rate,
44 | decay_steps=num_train_steps,
45 | end_learning_rate=0.0,
46 | power=1.0,
47 | cycle=False)
48 | fluid.layers.tensor.assign(decayed_lr, lr)
49 |
50 | return lr
51 |
52 |
53 | def optimization(loss,
54 | warmup_steps,
55 | num_train_steps,
56 | learning_rate,
57 | train_program,
58 | startup_prog,
59 | weight_decay,
60 | scheduler='linear_warmup_decay',
61 | use_fp16=False,
62 | loss_scaling=1.0):
63 | if warmup_steps > 0:
64 | if scheduler == 'noam_decay':
65 | scheduled_lr = fluid.layers.learning_rate_scheduler\
66 | .noam_decay(1/(warmup_steps *(learning_rate ** 2)),
67 | warmup_steps)
68 | elif scheduler == 'linear_warmup_decay':
69 | scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
70 | num_train_steps)
71 | else:
72 | raise ValueError("Unkown learning rate scheduler, should be "
73 | "'noam_decay' or 'linear_warmup_decay'")
74 | optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
75 | else:
76 | optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
77 | scheduled_lr = learning_rate
78 |
79 | clip_norm_thres = 1.0
80 | # When using mixed precision training, scale the gradient clip threshold
81 | # by loss_scaling
82 | if use_fp16 and loss_scaling > 1.0:
83 | clip_norm_thres *= loss_scaling
84 | fluid.clip.set_gradient_clip(
85 | clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
86 |
87 | def exclude_from_weight_decay(name):
88 | if name.find("layer_norm") > -1:
89 | return True
90 | bias_suffix = ["_bias", "_b", ".b_0"]
91 | for suffix in bias_suffix:
92 | if name.endswith(suffix):
93 | return True
94 | return False
95 |
96 | param_list = dict()
97 |
98 | if use_fp16:
99 | param_grads = optimizer.backward(loss)
100 | master_param_grads = create_master_params_grads(
101 | param_grads, train_program, startup_prog, loss_scaling)
102 |
103 | for param, _ in master_param_grads:
104 | param_list[param.name] = param * 1.0
105 | param_list[param.name].stop_gradient = True
106 |
107 | optimizer.apply_gradients(master_param_grads)
108 |
109 | if weight_decay > 0:
110 | for param, grad in master_param_grads:
111 | if exclude_from_weight_decay(param.name.rstrip(".master")):
112 | continue
113 | with param.block.program._optimized_guard(
114 | [param, grad]), fluid.framework.name_scope("weight_decay"):
115 | updated_param = param - param_list[
116 | param.name] * weight_decay * scheduled_lr
117 | fluid.layers.assign(output=param, input=updated_param)
118 |
119 | master_param_to_train_param(master_param_grads, param_grads,
120 | train_program)
121 |
122 | else:
123 | for param in train_program.global_block().all_parameters():
124 | param_list[param.name] = param * 1.0
125 | param_list[param.name].stop_gradient = True
126 |
127 | _, param_grads = optimizer.minimize(loss)
128 |
129 | if weight_decay > 0:
130 | for param, grad in param_grads:
131 | if exclude_from_weight_decay(param.name):
132 | continue
133 | with param.block.program._optimized_guard(
134 | [param, grad]), fluid.framework.name_scope("weight_decay"):
135 | updated_param = param - param_list[
136 | param.name] * weight_decay * scheduled_lr
137 | fluid.layers.assign(output=param, input=updated_param)
138 |
139 | return scheduled_lr
140 |
--------------------------------------------------------------------------------
/DGU/dgu/scripts/build_mrda_dataset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """build mrda train dev test dataset"""
16 |
17 | import sys
18 | import csv
19 | import os
20 | import io
21 | import re
22 |
23 | import commonlib
24 |
25 |
26 | class MRDA(object):
27 | """
28 | dialogue act dataset mrda data process
29 | """
30 | def __init__(self):
31 | """
32 | init instance
33 | """
34 | self.tag_id = 0
35 | self.map_tag_dict = dict()
36 | self.out_dir = "../../data/input/data/mrda"
37 | self.data_list = "./conf/mrda.conf"
38 | self.map_tag = "../../data/input/data/mrda/map_tag_id.txt"
39 | self.voc_map_tag = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/classmaps/map_01b_expanded_w_split"
40 | self.src_dir = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/data"
41 | self._load_file()
42 | self.tag_dict = commonlib.load_voc(self.voc_map_tag)
43 |
44 | def _load_file(self):
45 | """
46 | load dataset filename
47 | """
48 | self.dadb_dict = {}
49 | self.trans_dict = {}
50 | self.data_dict = commonlib.load_dict(self.data_list)
51 | file_list, file_path = commonlib.get_file_list(self.src_dir)
52 | for i in range(len(file_list)):
53 | name = file_list[i]
54 | keyword = name.split('.')[0]
55 | if 'dadb' in name:
56 | self.dadb_dict[keyword] = file_path[i]
57 | if 'trans' in name:
58 | self.trans_dict[keyword] = file_path[i]
59 |
60 | def load_dadb(self, data_type):
61 | """
62 | load dadb dataset
63 | """
64 | dadb_dict = {}
65 | conv_id_list = []
66 | dadb_list = self.data_dict[data_type]
67 | for dadb_key in dadb_list:
68 | dadb_file = self.dadb_dict[dadb_key]
69 | fr = io.open(dadb_file, 'r', encoding="utf8")
70 | row = csv.reader(fr, delimiter = ',')
71 | for line in row:
72 | elems = line
73 | conv_id = elems[2]
74 | conv_id_list.append(conv_id)
75 | if len(elems) != 14:
76 | continue
77 | error_code = elems[3]
78 | da_tag = elems[-9]
79 | da_ori_tag = elems[-6]
80 | dadb_dict[conv_id] = (error_code, da_ori_tag, da_tag)
81 | return dadb_dict, conv_id_list
82 |
83 | def load_trans(self, data_type):
84 | """load trans data"""
85 | trans_dict = {}
86 | trans_list = self.data_dict[data_type]
87 | for trans_key in trans_list:
88 | trans_file = self.trans_dict[trans_key]
89 | fr = io.open(trans_file, 'r', encoding="utf8")
90 | row = csv.reader(fr, delimiter = ',')
91 | for line in row:
92 | elems = line
93 | if len(elems) != 3:
94 | continue
95 | conv_id = elems[0]
96 | text = elems[1]
97 | text_process = elems[2]
98 | trans_dict[conv_id] = (text, text_process)
99 | return trans_dict
100 |
101 | def _parser_dataset(self, data_type):
102 | """
103 | parser train dev test dataset
104 | """
105 | out_filename = "%s/%s.txt" % (self.out_dir, data_type)
106 | dadb_dict, conv_id_list = self.load_dadb(data_type)
107 | trans_dict = self.load_trans(data_type)
108 | fw = io.open(out_filename, 'w', encoding="utf8")
109 | for elem in conv_id_list:
110 | v_dadb = dadb_dict[elem]
111 | v_trans = trans_dict[elem]
112 | da_tag = v_dadb[2]
113 | if da_tag not in self.tag_dict:
114 | continue
115 | tag = self.tag_dict[da_tag]
116 | if tag == "Z":
117 | continue
118 | if tag not in self.map_tag_dict:
119 | self.map_tag_dict[tag] = self.tag_id
120 | self.tag_id += 1
121 | caller = elem.split('_')[0].split('-')[-1]
122 | conv_no = elem.split('_')[0].split('-')[0]
123 | out = "%s\t%s\t%s\t%s" % (conv_no, self.map_tag_dict[tag], caller, v_trans[0])
124 | fw.write(u"%s\n" % out)
125 |
126 | def get_train_dataset(self):
127 | """
128 | parser train dataset and print train.txt
129 | """
130 | self._parser_dataset("train")
131 |
132 | def get_dev_dataset(self):
133 | """
134 | parser dev dataset and print dev.txt
135 | """
136 | self._parser_dataset("dev")
137 |
138 | def get_test_dataset(self):
139 | """
140 | parser test dataset and print test.txt
141 | """
142 | self._parser_dataset("test")
143 |
144 | def get_labels(self):
145 | """
146 | get tag and map ids file
147 | """
148 | fw = io.open(self.map_tag, 'w', encoding="utf8")
149 | for elem in self.map_tag_dict:
150 | fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem]))
151 |
152 | def main(self):
153 | """
154 | run data process
155 | """
156 | self.get_train_dataset()
157 | self.get_dev_dataset()
158 | self.get_test_dataset()
159 | self.get_labels()
160 |
161 | if __name__ == "__main__":
162 | mrda_inst = MRDA()
163 | mrda_inst.main()
164 |
165 |
166 |
167 |
168 |
--------------------------------------------------------------------------------
/DAM/utils/reader.py:
--------------------------------------------------------------------------------
1 | import cPickle as pickle
2 | import numpy as np
3 |
4 | def unison_shuffle(data, seed=None):
5 | if seed is not None:
6 | np.random.seed(seed)
7 |
8 | y = np.array(data['y'])
9 | c = np.array(data['c'])
10 | r = np.array(data['r'])
11 |
12 | assert len(y) == len(c) == len(r)
13 | p = np.random.permutation(len(y))
14 | shuffle_data = {'y': y[p], 'c': c[p], 'r': r[p]}
15 | return shuffle_data
16 |
17 | def split_c(c, split_id):
18 | '''c is a list, example context
19 | split_id is a integer, conf[_EOS_]
20 | return nested list
21 | '''
22 | turns = [[]]
23 | for _id in c:
24 | if _id != split_id:
25 | turns[-1].append(_id)
26 | else:
27 | turns.append([])
28 | if turns[-1] == [] and len(turns) > 1:
29 | turns.pop()
30 | return turns
31 |
32 | def normalize_length(_list, length, cut_type='tail'):
33 | '''_list is a list or nested list, example turns/r/single turn c
34 | cut_type is head or tail, if _list len > length is used
35 | return a list len=length and min(read_length, length)
36 | '''
37 | real_length = len(_list)
38 | if real_length == 0:
39 | return [0]*length, 0
40 |
41 | if real_length <= length:
42 | if not isinstance(_list[0], list):
43 | _list.extend([0]*(length - real_length))
44 | else:
45 | _list.extend([[]]*(length - real_length))
46 | return _list, real_length
47 |
48 | if cut_type == 'head':
49 | return _list[:length], length
50 | if cut_type == 'tail':
51 | return _list[-length:], length
52 |
53 | def produce_one_sample(data, index, split_id, max_turn_num, max_turn_len, turn_cut_type='tail', term_cut_type='tail'):
54 | '''max_turn_num=10
55 | max_turn_len=50
56 | return y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len
57 | '''
58 | c = data['c'][index]
59 | r = data['r'][index][:]
60 | y = data['y'][index]
61 |
62 | turns = split_c(c, split_id)
63 | #normalize turns_c length, nor_turns length is max_turn_num
64 | nor_turns, turn_len = normalize_length(turns, max_turn_num, turn_cut_type)
65 |
66 | nor_turns_nor_c = []
67 | term_len = []
68 | #nor_turn_nor_c length is max_turn_num, element is a list length is max_turn_len
69 | for c in nor_turns:
70 | #nor_c length is max_turn_len
71 | nor_c, nor_c_len = normalize_length(c, max_turn_len, term_cut_type)
72 | nor_turns_nor_c.append(nor_c)
73 | term_len.append(nor_c_len)
74 |
75 | nor_r, r_len = normalize_length(r, max_turn_len, term_cut_type)
76 |
77 | return y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len
78 |
79 | def build_one_batch(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail'):
80 | _turns = []
81 | _tt_turns_len = []
82 | _every_turn_len = []
83 |
84 | _response = []
85 | _response_len = []
86 |
87 | _label = []
88 |
89 | for i in range(conf['batch_size']):
90 | index = batch_index * conf['batch_size'] + i
91 | y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len = produce_one_sample(data, index, conf['_EOS_'], conf['max_turn_num'],
92 | conf['max_turn_len'], turn_cut_type, term_cut_type)
93 |
94 | _label.append(y)
95 | _turns.append(nor_turns_nor_c)
96 | _response.append(nor_r)
97 | _every_turn_len.append(term_len)
98 | _tt_turns_len.append(turn_len)
99 | _response_len.append(r_len)
100 |
101 | return _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label
102 |
103 | def build_one_batch_dict(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail'):
104 | _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label = build_one_batch(data, batch_index, conf, turn_cut_type, term_cut_type)
105 | ans = {'turns': _turns,
106 | 'tt_turns_len': _tt_turns_len,
107 | 'every_turn_len': _every_turn_len,
108 | 'response': _response,
109 | 'response_len': _response_len,
110 | 'label': _label}
111 | return ans
112 |
113 |
114 | def build_batches(data, conf, turn_cut_type='tail', term_cut_type='tail'):
115 | _turns_batches = []
116 | _tt_turns_len_batches = []
117 | _every_turn_len_batches = []
118 |
119 | _response_batches = []
120 | _response_len_batches = []
121 |
122 | _label_batches = []
123 |
124 | batch_len = int(len(data['y'])/conf['batch_size'])
125 | for batch_index in range(batch_len):
126 | _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label = build_one_batch(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail')
127 |
128 | _turns_batches.append(_turns)
129 | _tt_turns_len_batches.append(_tt_turns_len)
130 | _every_turn_len_batches.append(_every_turn_len)
131 |
132 | _response_batches.append(_response)
133 | _response_len_batches.append(_response_len)
134 |
135 | _label_batches.append(_label)
136 |
137 | ans = {
138 | "turns": _turns_batches, "tt_turns_len": _tt_turns_len_batches, "every_turn_len":_every_turn_len_batches,
139 | "response": _response_batches, "response_len": _response_len_batches, "label": _label_batches
140 | }
141 |
142 | return ans
143 |
144 | if __name__ == '__main__':
145 | conf = {
146 | "batch_size": 256,
147 | "max_turn_num": 10,
148 | "max_turn_len": 50,
149 | "_EOS_": 28270,
150 | }
151 | train, val, test = pickle.load(open('../../data/data_small.pkl', 'rb'))
152 | print('load data success')
153 |
154 | train_batches = build_batches(train, conf)
155 | val_batches = build_batches(val, conf)
156 | test_batches = build_batches(test, conf)
157 | print('build batches success')
158 |
159 | pickle.dump([train_batches, val_batches, test_batches], open('../../data/batches_small.pkl', 'wb'))
160 | print('dump success')
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
--------------------------------------------------------------------------------
/DGU/dgu/scripts/build_dstc2_dataset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """build mrda train dev test dataset"""
16 |
17 | import json
18 | import sys
19 | import csv
20 | import os
21 | import io
22 | import re
23 |
24 | import commonlib
25 |
26 |
27 | class DSTC2(object):
28 | """
29 | dialogue state tracking dstc2 data process
30 | """
31 | def __init__(self):
32 | """
33 | init instance
34 | """
35 | self.map_tag_dict = {}
36 | self.out_dir = "../../data/input/data/dstc2/dstc2"
37 | self.out_asr_dir = "../../data/input/data/dstc2/dstc2_asr"
38 | self.data_list = "./conf/dstc2.conf"
39 | self.map_tag = "../../data/input/data/dstc2/dstc2/map_tag_id.txt"
40 | self.src_dir = "../../data/input/data/dstc2/source_data"
41 | self.onto_json = "../../data/input/data/dstc2/source_data/ontology_dstc2.json"
42 | self._load_file()
43 | self._load_ontology()
44 |
45 | def _load_file(self):
46 | """
47 | load dataset filename
48 | """
49 | self.data_dict = commonlib.load_dict(self.data_list)
50 | for data_type in self.data_dict:
51 | for i in range(len(self.data_dict[data_type])):
52 | self.data_dict[data_type][i] = os.path.join(self.src_dir, self.data_dict[data_type][i])
53 |
54 | def _load_ontology(self):
55 | """
56 | load ontology tag
57 | """
58 | tag_id = 1
59 | self.map_tag_dict['none'] = 0
60 | fr = io.open(self.onto_json, 'r', encoding="utf8")
61 | ontology = json.load(fr)
62 | slots_values = ontology['informable']
63 | for slot in slots_values:
64 | for value in slots_values[slot]:
65 | key = "%s_%s" % (slot, value)
66 | self.map_tag_dict[key] = tag_id
67 | tag_id += 1
68 | key = "%s_none" % (slot)
69 | self.map_tag_dict[key] = tag_id
70 | tag_id += 1
71 |
72 | def _parser_dataset(self, data_type):
73 | """
74 | parser train dev test dataset
75 | """
76 | stat = os.path.exists(self.out_dir)
77 | if not stat:
78 | os.makedirs(self.out_dir)
79 | asr_stat = os.path.exists(self.out_asr_dir)
80 | if not asr_stat:
81 | os.makedirs(self.out_asr_dir)
82 | out_file = os.path.join(self.out_dir, "%s.txt" % data_type)
83 | out_asr_file = os.path.join(self.out_asr_dir, "%s.txt" % data_type)
84 | fw = io.open(out_file, 'w', encoding="utf8")
85 | fw_asr = io.open(out_asr_file, 'w', encoding="utf8")
86 | data_list = self.data_dict.get(data_type)
87 | for fn in data_list:
88 | log_file = os.path.join(fn, "log.json")
89 | label_file = os.path.join(fn, "label.json")
90 | f_log = io.open(log_file, 'r', encoding="utf8")
91 | f_label = io.open(label_file, 'r', encoding="utf8")
92 | log_json = json.load(f_log)
93 | label_json = json.load(f_label)
94 | session_id = log_json['session-id']
95 | assert len(label_json["turns"]) == len(log_json["turns"])
96 | for i in range(len(label_json["turns"])):
97 | log_turn = log_json["turns"][i]
98 | label_turn = label_json["turns"][i]
99 | assert log_turn["turn-index"] == label_turn["turn-index"]
100 | labels = ["%s_%s" % (slot, label_turn["goal-labels"][slot]) for slot in label_turn["goal-labels"]]
101 | labels_ids = " ".join([str(self.map_tag_dict.get(label, self.map_tag_dict["%s_none" % label.split('_')[0]])) for label in labels])
102 | mach = log_turn['output']['transcript']
103 | user = label_turn['transcription']
104 | if not labels_ids.strip():
105 | labels_ids = self.map_tag_dict['none']
106 | out = "%s\t%s\1%s\t%s" % (session_id.encode('utf-8'), mach.encode('utf-8'), user.encode('utf-8'), labels_ids)
107 | user_asr = log_turn['input']['live']['asr-hyps'][0]['asr-hyp'].strip()
108 | out_asr = "%s\t%s\1%s\t%s" % (session_id.encode('utf-8'), mach.encode('utf-8'), user_asr.encode('utf-8'), labels_ids)
109 | fw.write(u"%s\n" % out)
110 | fw_asr.write(u"%s\n" % out_asr)
111 |
112 | def get_train_dataset(self):
113 | """
114 | parser train dataset and print train.txt
115 | """
116 | self._parser_dataset("train")
117 |
118 | def get_dev_dataset(self):
119 | """
120 | parser dev dataset and print dev.txt
121 | """
122 | self._parser_dataset("dev")
123 |
124 | def get_test_dataset(self):
125 | """
126 | parser test dataset and print test.txt
127 | """
128 | self._parser_dataset("test")
129 |
130 | def get_labels(self):
131 | """
132 | get tag and map ids file
133 | """
134 | fw = io.open(self.map_tag, 'w', encoding="utf8")
135 | for elem in self.map_tag_dict:
136 | fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem]))
137 |
138 | def main(self):
139 | """
140 | run data process
141 | """
142 | self.get_train_dataset()
143 | self.get_dev_dataset()
144 | self.get_test_dataset()
145 | self.get_labels()
146 |
147 | if __name__ == "__main__":
148 | dstc_inst = DSTC2()
149 | dstc_inst.main()
150 |
151 |
152 |
153 |
154 |
--------------------------------------------------------------------------------
/DAM/bin/train_and_evaluate.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import time
4 |
5 | import cPickle as pickle
6 | import tensorflow as tf
7 | import numpy as np
8 |
9 | import utils.reader as reader
10 | import utils.evaluation as eva
11 |
12 |
13 | def train(conf, _model):
14 |
15 | if conf['rand_seed'] is not None:
16 | np.random.seed(conf['rand_seed'])
17 |
18 | if not os.path.exists(conf['save_path']):
19 | os.makedirs(conf['save_path'])
20 |
21 | # load data
22 | print('starting loading data')
23 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
24 | train_data, val_data, test_data = pickle.load(open(conf["data_path"], 'rb'))
25 | print('finish loading data')
26 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
27 |
28 | val_batches = reader.build_batches(val_data, conf)
29 |
30 | print("finish building test batches")
31 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
32 |
33 | # refine conf
34 | batch_num = int(len(train_data['y']) / conf["batch_size"])
35 | val_batch_num = len(val_batches["response"])
36 |
37 | conf["train_steps"] = conf["num_scan_data"] * batch_num
38 | conf["save_step"] = int(max(1, batch_num / 10))
39 | conf["print_step"] = int(max(1, batch_num / 100))
40 |
41 | print('configurations: %s' %conf)
42 |
43 | print('model sucess')
44 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
45 |
46 | _graph = _model.build_graph()
47 | print('build graph sucess')
48 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
49 |
50 | with tf.Session(graph=_graph) as sess:
51 | _model.init.run();
52 | if conf["init_model"]:
53 | _model.saver.restore(sess, conf["init_model"])
54 | print("sucess init %s" %conf["init_model"])
55 |
56 | average_loss = 0.0
57 | batch_index = 0
58 | step = 0
59 | best_result = [0, 0, 0, 0]
60 |
61 | for step_i in xrange(conf["num_scan_data"]):
62 | #for batch_index in rng.permutation(range(batch_num)):
63 | print('starting shuffle train data')
64 | shuffle_train = reader.unison_shuffle(train_data)
65 | train_batches = reader.build_batches(shuffle_train, conf)
66 | print('finish building train data')
67 | for batch_index in range(batch_num):
68 |
69 | feed = {
70 | _model.turns: train_batches["turns"][batch_index],
71 | _model.tt_turns_len: train_batches["tt_turns_len"][batch_index],
72 | _model.every_turn_len: train_batches["every_turn_len"][batch_index],
73 | _model.response: train_batches["response"][batch_index],
74 | _model.response_len: train_batches["response_len"][batch_index],
75 | _model.label: train_batches["label"][batch_index]
76 | }
77 |
78 | batch_index = (batch_index + 1) % batch_num;
79 |
80 | _, curr_loss = sess.run([_model.g_updates, _model.loss], feed_dict = feed)
81 |
82 |
83 | average_loss += curr_loss
84 |
85 | step += 1
86 |
87 | if step % conf["print_step"] == 0 and step > 0:
88 | g_step, lr = sess.run([_model.global_step, _model.learning_rate])
89 | print('step: %s, lr: %s' %(g_step, lr))
90 | print("processed: [" + str(step * 1.0 / batch_num) + "] loss: [" + str(average_loss / conf["print_step"]) + "]" )
91 | average_loss = 0
92 |
93 |
94 | if step % conf["save_step"] == 0 and step > 0:
95 | index = step / conf['save_step']
96 | score_file_path = conf['save_path'] + 'score.' + str(index)
97 | score_file = open(score_file_path, 'w')
98 | print('save step: %s' %index)
99 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
100 |
101 | for batch_index in xrange(val_batch_num):
102 |
103 | feed = {
104 | _model.turns: val_batches["turns"][batch_index],
105 | _model.tt_turns_len: val_batches["tt_turns_len"][batch_index],
106 | _model.every_turn_len: val_batches["every_turn_len"][batch_index],
107 | _model.response: val_batches["response"][batch_index],
108 | _model.response_len: val_batches["response_len"][batch_index],
109 | _model.label: val_batches["label"][batch_index]
110 | }
111 |
112 | scores = sess.run(_model.logits, feed_dict = feed)
113 |
114 | for i in xrange(conf["batch_size"]):
115 | score_file.write(
116 | str(scores[i]) + '\t' +
117 | str(val_batches["label"][batch_index][i]) + '\n')
118 | score_file.close()
119 |
120 | #write evaluation result
121 | result = eva.evaluate(score_file_path)
122 | result_file_path = conf["save_path"] + "result." + str(index)
123 | with open(result_file_path, 'w') as out_file:
124 | for p_at in result:
125 | out_file.write(str(p_at) + '\n')
126 | print('finish evaluation')
127 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
128 |
129 | if result[1] + result[2] > best_result[1] + best_result[2]:
130 | best_result = result
131 | _save_path = _model.saver.save(sess, conf["save_path"] + "model.ckpt." + str(step / conf["save_step"]))
132 | print("succ saving model in " + _save_path)
133 | print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
134 |
135 |
136 |
137 |
--------------------------------------------------------------------------------
/DGU/dgu/scripts/build_atis_dataset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """build swda train dev test dataset"""
17 |
18 | import json
19 | import sys
20 | import csv
21 | import os
22 | import io
23 | import re
24 |
25 |
26 | class ATIS(object):
27 | """
28 | nlu dataset atis data process
29 | """
30 | def __init__(self):
31 | """
32 | init instance
33 | """
34 | self.slot_id = 2
35 | self.slot_dict = {"PAD": 0, "O": 1}
36 | self.intent_id = 0
37 | self.intent_dict = dict()
38 | self.src_dir = "../../data/input/data/atis/source_data"
39 | self.out_slot_dir = "../../data/input/data/atis/atis_slot"
40 | self.out_intent_dir = "../../data/input/data/atis/atis_intent"
41 | self.map_tag_slot = "../../data/input/data/atis/atis_slot/map_tag_slot_id.txt"
42 | self.map_tag_intent = "../../data/input/data/atis/atis_intent/map_tag_intent_id.txt"
43 |
44 | def _load_file(self, data_type):
45 | """
46 | load dataset filename
47 | """
48 | slot_stat = os.path.exists(self.out_slot_dir)
49 | if not slot_stat:
50 | os.makedirs(self.out_slot_dir)
51 | intent_stat = os.path.exists(self.out_intent_dir)
52 | if not intent_stat:
53 | os.makedirs(self.out_intent_dir)
54 | src_examples = []
55 | json_file = os.path.join(self.src_dir, "%s.json" % data_type)
56 | load_f = io.open(json_file, 'r', encoding="utf8")
57 | json_dict = json.load(load_f)
58 | examples = json_dict['rasa_nlu_data']['common_examples']
59 | for example in examples:
60 | text = example.get('text')
61 | intent = example.get('intent')
62 | entities = example.get('entities')
63 | src_examples.append((text, intent, entities))
64 | return src_examples
65 |
66 | def _parser_intent_data(self, examples, data_type):
67 | """
68 | parser intent dataset
69 | """
70 | out_filename = "%s/%s.txt" % (self.out_intent_dir, data_type)
71 | fw = io.open(out_filename, 'w', encoding="utf8")
72 | for example in examples:
73 | if example[1] not in self.intent_dict:
74 | self.intent_dict[example[1]] = self.intent_id
75 | self.intent_id += 1
76 | fw.write(u"%s\t%s\n" % (self.intent_dict[example[1]], example[0].lower()))
77 |
78 | fw = io.open(self.map_tag_intent, 'w', encoding="utf8")
79 | for tag in self.intent_dict:
80 | fw.write(u"%s\t%s\n" % (tag, self.intent_dict[tag]))
81 |
82 | def _parser_slot_data(self, examples, data_type):
83 | """
84 | parser slot dataset
85 | """
86 | out_filename = "%s/%s.txt" % (self.out_slot_dir, data_type)
87 | fw = io.open(out_filename, 'w', encoding="utf8")
88 | for example in examples:
89 | tags = []
90 | text = example[0]
91 | entities = example[2]
92 | if not entities:
93 | tags = [str(self.slot_dict['O'])] * len(text.strip().split())
94 | continue
95 | for i in range(len(entities)):
96 | enty = entities[i]
97 | start = enty['start']
98 | value_num = len(enty['value'].split())
99 | tags_slot = []
100 | for j in range(value_num):
101 | if j == 0:
102 | bround_tag = "B"
103 | else:
104 | bround_tag = "I"
105 | tag = "%s-%s" % (bround_tag, enty['entity'])
106 | if tag not in self.slot_dict:
107 | self.slot_dict[tag] = self.slot_id
108 | self.slot_id += 1
109 | tags_slot.append(str(self.slot_dict[tag]))
110 | if i == 0:
111 | if start not in [0, 1]:
112 | prefix_num = len(text[: start].strip().split())
113 | tags.extend([str(self.slot_dict['O'])] * prefix_num)
114 | tags.extend(tags_slot)
115 | else:
116 | prefix_num = len(text[entities[i - 1]['end']: start].strip().split())
117 | tags.extend([str(self.slot_dict['O'])] * prefix_num)
118 | tags.extend(tags_slot)
119 | if entities[-1]['end'] < len(text):
120 | suffix_num = len(text[entities[-1]['end']:].strip().split())
121 | tags.extend([str(self.slot_dict['O'])] * suffix_num)
122 | fw.write(u"%s\t%s\n" % (text.encode('utf8'), " ".join(tags).encode('utf8')))
123 |
124 | fw = io.open(self.map_tag_slot, 'w', encoding="utf8")
125 | for slot in self.slot_dict:
126 | fw.write(u"%s\t%s\n" % (slot, self.slot_dict[slot]))
127 |
128 | def get_train_dataset(self):
129 | """
130 | parser train dataset and print train.txt
131 | """
132 | train_examples = self._load_file("train")
133 | self._parser_intent_data(train_examples, "train")
134 | self._parser_slot_data(train_examples, "train")
135 |
136 | def get_test_dataset(self):
137 | """
138 | parser test dataset and print test.txt
139 | """
140 | test_examples = self._load_file("test")
141 | self._parser_intent_data(test_examples, "test")
142 | self._parser_slot_data(test_examples, "test")
143 |
144 | def main(self):
145 | """
146 | run data process
147 | """
148 | self.get_train_dataset()
149 | self.get_test_dataset()
150 |
151 |
152 | if __name__ == "__main__":
153 | atis_inst = ATIS()
154 | atis_inst.main()
155 |
156 |
157 |
158 |
159 |
--------------------------------------------------------------------------------
/DGU/dgu/define_paradigm.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """define network paradigm"""
15 |
16 | import sys
17 | import re
18 |
19 | import paddle
20 | import paddle.fluid as fluid
21 |
22 |
23 | class Paradigm(object):
24 | """
25 | define network paradigm
26 | """
27 |
28 | def __init__(self, task_name):
29 | """
30 | init
31 | """
32 | self.task_name = task_name
33 |
34 | def create_cls(self, transformer_inst, params):
35 | """
36 | create classify paradigm network
37 | """
38 | cls_feats = transformer_inst.get_pooled_output()
39 | cls_feats = fluid.layers.dropout(
40 | x=cls_feats,
41 | dropout_prob=0.1,
42 | dropout_implementation="upscale_in_train")
43 | logits = fluid.layers.fc(
44 | input=cls_feats,
45 | size=params['num_labels'],
46 | param_attr=fluid.ParamAttr(
47 | name="cls_out_w",
48 | initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
49 | bias_attr=fluid.ParamAttr(
50 | name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
51 |
52 | if not params['is_training']:
53 | probs = fluid.layers.softmax(logits)
54 | results = {"probs": probs}
55 | return results
56 |
57 | ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
58 | logits=logits, label=params['labels'], return_softmax=True)
59 | loss = fluid.layers.mean(x=ce_loss)
60 | num_seqs = fluid.layers.create_tensor(dtype='int64')
61 | accuracy = fluid.layers.accuracy(
62 | input=probs, label=params['labels'], total=num_seqs)
63 |
64 | results = {
65 | "loss": loss,
66 | "probs": probs,
67 | "accuracy": accuracy,
68 | "num_seqs": num_seqs
69 | }
70 | return results
71 |
72 | def create_multi_cls(self, transformer_inst, params):
73 | """
74 | create multi classify paradigm network
75 | """
76 | cls_feats = transformer_inst.get_pooled_output()
77 | cls_feats = fluid.layers.dropout(
78 | x=cls_feats,
79 | dropout_prob=0.1,
80 | dropout_implementation="upscale_in_train")
81 | logits = fluid.layers.fc(
82 | input=cls_feats,
83 | size=params['num_labels'],
84 | param_attr=fluid.ParamAttr(
85 | name="cls_out_w",
86 | initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
87 | bias_attr=fluid.ParamAttr(
88 | name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
89 |
90 | labels_onehot = fluid.layers.cast(params["labels"], dtype='float32')
91 | ce_loss = fluid.layers.reduce_sum(
92 | fluid.layers.sigmoid_cross_entropy_with_logits(
93 | x=logits, label=labels_onehot))
94 | loss = fluid.layers.mean(x=ce_loss)
95 | probs = fluid.layers.sigmoid(logits)
96 |
97 | if not params['is_training']:
98 | results = {"probs": probs}
99 | return results
100 |
101 | num_seqs = fluid.layers.tensor.fill_constant(
102 | shape=[1], dtype='int64', value=1)
103 |
104 | results = {"loss": loss, "probs": probs, "num_seqs": num_seqs}
105 | return results
106 |
107 | def create_sequence_tagging(self, transformer_inst, params):
108 | """
109 | create sequence tagging paradigm
110 | """
111 | output_layer = transformer_inst.get_sequence_output()
112 | hidden_size = output_layer.shape[-1]
113 | output_layer = fluid.layers.stack(output_layer, axis=1)
114 | output_layer = fluid.layers.reshape(output_layer, [-1, hidden_size])
115 |
116 | logits = fluid.layers.fc(input=output_layer, size=params['num_labels'])
117 | probs = fluid.layers.cast(
118 | fluid.layers.argmax(
119 | logits, axis=1), dtype='int32')
120 |
121 | if not params['is_training']:
122 | results = {"probs": probs}
123 | return results
124 |
125 | num_seqs = fluid.layers.tensor.fill_constant(
126 | shape=[1], dtype='int64', value=1)
127 | y_label_reshape = fluid.layers.cast(
128 | fluid.layers.reshape(params['labels'], [-1]), dtype='int32')
129 | correct_prediction = fluid.layers.equal(probs, y_label_reshape)
130 | accuracy = fluid.layers.mean(
131 | fluid.layers.cast(
132 | correct_prediction, dtype='float32'))
133 | ce_loss = fluid.layers.softmax_with_cross_entropy(logits=logits, \
134 | label=fluid.layers.reshape(params['labels'], [-1, 1]))
135 | loss = fluid.layers.mean(x=ce_loss)
136 |
137 | results = {
138 | "loss": loss,
139 | "probs": probs,
140 | "accuracy": accuracy,
141 | "num_seqs": num_seqs
142 | }
143 | return results
144 |
145 | def paradigm(self, transformer_inst, params):
146 | """
147 | run paradigm
148 | """
149 | results = None
150 | if self.task_name == 'udc':
151 | results = self.create_cls(transformer_inst, params)
152 | elif self.task_name == 'swda':
153 | results = self.create_cls(transformer_inst, params)
154 | elif self.task_name == 'mrda':
155 | results = self.create_cls(transformer_inst, params)
156 | elif self.task_name == 'atis_intent':
157 | results = self.create_cls(transformer_inst, params)
158 | elif self.task_name == 'atis_slot':
159 | results = self.create_sequence_tagging(transformer_inst, params)
160 | elif self.task_name == 'dstc2':
161 | results = self.create_multi_cls(transformer_inst, params)
162 | return results
163 |
--------------------------------------------------------------------------------
/DGU/predict.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import io
17 | import os
18 | import sys
19 | import numpy as np
20 | import argparse
21 | import collections
22 | import paddle
23 | import paddle.fluid as fluid
24 |
25 | import dgu.reader as reader
26 | from dgu_net import create_net
27 | import dgu.define_paradigm as define_paradigm
28 | import dgu.define_predict_pack as define_predict_pack
29 |
30 | from dgu.utils.configure import PDConfig
31 | from dgu.utils.input_field import InputField
32 | from dgu.utils.model_check import check_cuda
33 | import dgu.utils.save_load_io as save_load_io
34 | from dgu.utils.py23 import tab_tok, rt_tok
35 |
36 |
37 | def do_predict(args):
38 | """predict function"""
39 |
40 | task_name = args.task_name.lower()
41 | paradigm_inst = define_paradigm.Paradigm(task_name)
42 | pred_inst = define_predict_pack.DefinePredict()
43 | pred_func = getattr(pred_inst, pred_inst.task_map[task_name])
44 |
45 | processors = {
46 | 'udc': reader.UDCProcessor,
47 | 'swda': reader.SWDAProcessor,
48 | 'mrda': reader.MRDAProcessor,
49 | 'atis_slot': reader.ATISSlotProcessor,
50 | 'atis_intent': reader.ATISIntentProcessor,
51 | 'dstc2': reader.DSTC2Processor,
52 | }
53 |
54 | test_prog = fluid.default_main_program()
55 | startup_prog = fluid.default_startup_program()
56 |
57 | with fluid.program_guard(test_prog, startup_prog):
58 | test_prog.random_seed = args.random_seed
59 | startup_prog.random_seed = args.random_seed
60 |
61 | with fluid.unique_name.guard():
62 |
63 | # define inputs of the network
64 | num_labels = len(processors[task_name].get_labels())
65 |
66 | src_ids = fluid.data(
67 | name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
68 | pos_ids = fluid.data(
69 | name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
70 | sent_ids = fluid.data(
71 | name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
72 | input_mask = fluid.data(
73 | name='input_mask',
74 | shape=[-1, args.max_seq_len],
75 | dtype='float32')
76 | if args.task_name == 'atis_slot':
77 | labels = fluid.data(
78 | name='labels', shape=[-1, args.max_seq_len], dtype='int64')
79 | elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
80 | labels = fluid.data(
81 | name='labels', shape=[-1, num_labels], dtype='int64')
82 | else:
83 | labels = fluid.data(name='labels', shape=[-1, 1], dtype='int64')
84 |
85 | input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
86 | input_field = InputField(input_inst)
87 | data_reader = fluid.io.PyReader(
88 | feed_list=input_inst, capacity=4, iterable=False)
89 |
90 | results = create_net(
91 | is_training=False,
92 | model_input=input_field,
93 | num_labels=num_labels,
94 | paradigm_inst=paradigm_inst,
95 | args=args)
96 |
97 | probs = results.get("probs", None)
98 |
99 | probs.persistable = True
100 |
101 | fetch_list = [probs.name]
102 |
103 | #for_test is True if change the is_test attribute of operators to True
104 | test_prog = test_prog.clone(for_test=True)
105 |
106 | if args.use_cuda:
107 | place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
108 | else:
109 | place = fluid.CPUPlace()
110 |
111 | exe = fluid.Executor(place)
112 | exe.run(startup_prog)
113 |
114 | assert (args.init_from_params) or (args.init_from_pretrain_model)
115 |
116 | if args.init_from_params:
117 | save_load_io.init_from_params(args, exe, test_prog)
118 | if args.init_from_pretrain_model:
119 | save_load_io.init_from_pretrain_model(args, exe, test_prog)
120 |
121 | compiled_test_prog = fluid.CompiledProgram(test_prog)
122 |
123 | processor = processors[task_name](data_dir=args.data_dir,
124 | vocab_path=args.vocab_path,
125 | max_seq_len=args.max_seq_len,
126 | do_lower_case=args.do_lower_case,
127 | in_tokens=args.in_tokens,
128 | task_name=task_name,
129 | random_seed=args.random_seed)
130 | batch_generator = processor.data_generator(
131 | batch_size=args.batch_size, phase='test', shuffle=False)
132 |
133 | data_reader.decorate_batch_generator(batch_generator)
134 | data_reader.start()
135 |
136 | all_results = []
137 | while True:
138 | try:
139 | results = exe.run(compiled_test_prog, fetch_list=fetch_list)
140 | all_results.extend(results[0])
141 | except fluid.core.EOFException:
142 | data_reader.reset()
143 | break
144 |
145 | np.set_printoptions(precision=4, suppress=True)
146 | print("Write the predicted results into the output_prediction_file")
147 |
148 | fw = io.open(args.output_prediction_file, 'w', encoding="utf8")
149 | if task_name not in ['atis_slot']:
150 | for index, result in enumerate(all_results):
151 | tags = pred_func(result)
152 | fw.write("%s%s%s%s" % (index, tab_tok, tags, rt_tok))
153 | else:
154 | tags = pred_func(all_results, args.max_seq_len)
155 | for index, tag in enumerate(tags):
156 | fw.write("%s%s%s%s" % (index, tab_tok, tag, rt_tok))
157 |
158 |
159 | if __name__ == "__main__":
160 |
161 | args = PDConfig(yaml_file="./data/config/dgu.yaml")
162 | args.build()
163 | args.Print()
164 |
165 | check_cuda(args.use_cuda)
166 |
167 | do_predict(args)
168 |
--------------------------------------------------------------------------------
/ADE/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export FLAGS_sync_nccl_allreduce=0
4 | export FLAGS_eager_delete_tensor_gb=1.0
5 |
6 | export CUDA_VISIBLE_DEVICES=0
7 |
8 | if [ $# -ne 2 ]
9 | then
10 | echo "please input parameters: TRAIN_TYPE and TASK_TYPE"
11 | echo "TRAIN_TYPE: [matching|seq2seq_naive|seq2seq_att|keywords|human]"
12 | echo "TASK_TYPE: [train|predict|evaluate|inference]"
13 | exit 255
14 | fi
15 |
16 | TRAIN_TYPE=$1
17 | TASK_TYPE=$2
18 |
19 | candi_train_type=("matching" "seq2seq_naive" "seq2seq_att" "keywords" "human")
20 | candi_task_type=("train" "predict" "evaluate" "inference")
21 |
22 | if [[ ! "${candi_train_type[@]}" =~ ${TRAIN_TYPE} ]]
23 | then
24 | echo "unknown parameter: ${TRAIN_TYPE}, just support [matching|seq2seq_naive|seq2seq_att|keywords|human]"
25 | exit 255
26 | fi
27 |
28 | if [[ ! "${candi_task_type[@]}" =~ ${TASK_TYPE} ]]
29 | then
30 | echo "unknown parameter: ${TRAIN_TYPE}, just support [train|predict|evaluate|inference]"
31 | exit 255
32 | fi
33 |
34 | INPUT_PATH="data/input/data"
35 | OUTPUT_PATH="data/output"
36 | SAVED_MODELS="data/saved_models"
37 | INFERENCE_MODEL="data/inference_models"
38 | PYTHON_PATH="python"
39 |
40 | #train pretrain model
41 | if [ ! "$CUDA_VISIBLE_DEVICES" ]
42 | then
43 | export CPU_NUM=1
44 | use_cuda=false
45 | else
46 | use_cuda=true
47 | fi
48 |
49 | #training
50 | function pretrain_train()
51 | {
52 |
53 | pretrain_model_path="${SAVED_MODELS}/matching_pretrained"
54 | if [ -f ${pretrain_model_path} ]
55 | then
56 | rm ${pretrain_model_path}
57 | fi
58 |
59 | if [ ! -d ${pretrain_model_path} ]
60 | then
61 | mkdir ${pretrain_model_path}
62 | fi
63 |
64 | ${PYTHON_PATH} -u main.py \
65 | --do_train=true \
66 | --use_cuda=${1} \
67 | --loss_type="CLS" \
68 | --max_seq_len=50 \
69 | --save_model_path=${pretrain_model_path} \
70 | --save_param="params" \
71 | --training_file="${INPUT_PATH}/unlabel_data/train.ids" \
72 | --epoch=20 \
73 | --print_step=1 \
74 | --save_step=400 \
75 | --batch_size=256 \
76 | --hidden_size=256 \
77 | --emb_size=256 \
78 | --vocab_size=484016 \
79 | --learning_rate=0.001 \
80 | --sample_pro=0.1
81 | }
82 |
83 | function finetuning_train()
84 | {
85 | save_model_path="${SAVED_MODELS}/${2}_finetuned"
86 |
87 | if [ -f ${save_model_path} ]
88 | then
89 | rm ${save_model_path}
90 | fi
91 |
92 | if [ ! -d ${save_model_path} ]
93 | then
94 | mkdir ${save_model_path}
95 | fi
96 |
97 | ${PYTHON_PATH} -u main.py \
98 | --do_train=true \
99 | --use_cuda=${1} \
100 | --loss_type="L2" \
101 | --max_seq_len=50 \
102 | --init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/params/step_final" \
103 | --save_model_path=${save_model_path} \
104 | --save_param="params" \
105 | --training_file="${INPUT_PATH}/label_data/${2}/train.ids" \
106 | --epoch=50 \
107 | --print_step=1 \
108 | --save_step=400 \
109 | --batch_size=256 \
110 | --hidden_size=256 \
111 | --emb_size=256 \
112 | --vocab_size=484016 \
113 | --learning_rate=0.001 \
114 | --sample_pro=0.1
115 | }
116 |
117 | #predict
118 | function pretrain_predict()
119 | {
120 | ${PYTHON_PATH} -u main.py \
121 | --do_predict=true \
122 | --use_cuda=${1} \
123 | --predict_file="${INPUT_PATH}/unlabel_data/test.ids" \
124 | --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
125 | --loss_type="CLS" \
126 | --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
127 | --max_seq_len=50 \
128 | --batch_size=256 \
129 | --hidden_size=256 \
130 | --emb_size=256 \
131 | --vocab_size=484016
132 | }
133 |
134 | function finetuning_predict()
135 | {
136 | ${PYTHON_PATH} -u main.py \
137 | --do_predict=true \
138 | --use_cuda=${1} \
139 | --predict_file="${INPUT_PATH}/label_data/${2}/test.ids" \
140 | --init_from_params=${SAVED_MODELS}/trained_models/${2}_finetuned/params \
141 | --loss_type="L2" \
142 | --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
143 | --max_seq_len=50 \
144 | --batch_size=256 \
145 | --hidden_size=256 \
146 | --emb_size=256 \
147 | --vocab_size=484016
148 | }
149 |
150 | #evaluate
151 | function pretrain_eval()
152 | {
153 | ${PYTHON_PATH} -u main.py \
154 | --do_eval=true \
155 | --use_cuda=${1} \
156 | --evaluation_file="${INPUT_PATH}/unlabel_data/test.ids" \
157 | --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
158 | --loss_type="CLS"
159 | }
160 |
161 | function finetuning_eval()
162 | {
163 | ${PYTHON_PATH} -u main.py \
164 | --do_eval=true \
165 | --use_cuda=${1} \
166 | --evaluation_file="${INPUT_PATH}/label_data/${2}/test.ids" \
167 | --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
168 | --loss_type="L2"
169 | }
170 |
171 | #inference model
172 | function pretrain_infer()
173 | {
174 | ${PYTHON_PATH} -u main.py \
175 | --do_save_inference_model=true \
176 | --use_cuda=${1} \
177 | --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
178 | --inference_model_dir="${INFERENCE_MODEL}/matching_inference_model"
179 |
180 | }
181 | function finetuning_infer()
182 | {
183 | ${PYTHON_PATH} -u main.py \
184 | --do_save_inference_model=true \
185 | --use_cuda=${1} \
186 | --init_from_params="${SAVED_MODELS}/trained_models/${2}_finetuned/params" \
187 | --inference_model_dir="${INFERENCE_MODEL}/${2}_inference_model"
188 | }
189 |
190 | if [ "${TASK_TYPE}" = "train" ]
191 | then
192 | echo "train ${TRAIN_TYPE} start.........."
193 | if [ "${TRAIN_TYPE}" = "matching" ]
194 | then
195 | pretrain_train ${use_cuda};
196 | else
197 | finetuning_train ${use_cuda} ${TRAIN_TYPE};
198 | fi
199 | elif [ "${TASK_TYPE}" = "predict" ]
200 | then
201 | echo "predict ${TRAIN_TYPE} start.........."
202 | if [ "${TRAIN_TYPE}" = "matching" ]
203 | then
204 | pretrain_predict ${use_cuda};
205 | else
206 | finetuning_predict ${use_cuda} ${TRAIN_TYPE};
207 | fi
208 | elif [ "${TASK_TYPE}" = "evaluate" ]
209 | then
210 | echo "evaluate ${TRAIN_TYPE} start.........."
211 | if [ "${TRAIN_TYPE}" = "matching" ]
212 | then
213 | pretrain_eval ${use_cuda};
214 | else
215 | finetuning_eval ${use_cuda} ${TRAIN_TYPE};
216 | fi
217 | elif [ "${TASK_TYPE}" = "inference" ]
218 | then
219 | echo "save ${TRAIN_TYPE} inference model start.........."
220 | if [ "${TRAIN_TYPE}" = "matching" ]
221 | then
222 | pretrain_infer ${use_cuda};
223 | else
224 | finetuning_infer ${use_cuda} ${TRAIN_TYPE};
225 | fi
226 | else
227 | exit 255
228 | fi
229 |
230 |
--------------------------------------------------------------------------------
/DAM/models/self_match_net.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import cPickle as pickle
4 |
5 | import utils.layers as layers
6 | import utils.operations as op
7 |
8 | class Net(object):
9 | '''Add positional encoding(initializer lambda is 0),
10 | cross-attention, cnn integrated and grad clip by value.
11 |
12 | Attributes:
13 | conf: a configuration paramaters dict
14 | word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size]
15 | '''
16 | def __init__(self, conf):
17 | self._graph = tf.Graph()
18 | self._conf = conf
19 |
20 | if self._conf['word_emb_init'] is not None:
21 | print('loading word emb init')
22 | self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb'))
23 | else:
24 | self._word_embedding_init = None
25 |
26 | def build_graph(self):
27 | with self._graph.as_default():
28 | rand_seed = self._conf['rand_seed']
29 | tf.set_random_seed(rand_seed)
30 |
31 | #word embedding
32 | if self._word_embedding_init is not None:
33 | word_embedding_initializer = tf.constant_initializer(self._word_embedding_init)
34 | else:
35 | word_embedding_initializer = tf.random_normal_initializer(stddev=0.1)
36 |
37 | self._word_embedding = tf.get_variable(
38 | name='word_embedding',
39 | shape=[self._conf['vocab_size']+1, self._conf['emb_size']],
40 | dtype=tf.float32,
41 | initializer=word_embedding_initializer)
42 |
43 |
44 | #define placehloders
45 | self.turns = tf.placeholder(
46 | tf.int32,
47 | shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]])
48 |
49 | self.tt_turns_len = tf.placeholder(
50 | tf.int32,
51 | shape=[self._conf["batch_size"]])
52 |
53 | self.every_turn_len = tf.placeholder(
54 | tf.int32,
55 | shape=[self._conf["batch_size"], self._conf["max_turn_num"]])
56 |
57 | self.response = tf.placeholder(
58 | tf.int32,
59 | shape=[self._conf["batch_size"], self._conf["max_turn_len"]])
60 |
61 | self.response_len = tf.placeholder(
62 | tf.int32,
63 | shape=[self._conf["batch_size"]])
64 |
65 | self.label = tf.placeholder(
66 | tf.float32,
67 | shape=[self._conf["batch_size"]])
68 |
69 |
70 | #define operations
71 | #response part
72 | Hr = tf.nn.embedding_lookup(self._word_embedding, self.response)
73 | #Hr_stack = [Hr]
74 |
75 | if self._conf['is_positional'] and self._conf['stack_num'] > 0:
76 | with tf.variable_scope('positional'):
77 | Hr = op.positional_encoding_vector(Hr, max_timescale=10)
78 | Hr_stack = [Hr]
79 |
80 | for index in range(self._conf['stack_num']):
81 | with tf.variable_scope('self_stack_' + str(index)):
82 | Hr = layers.block(
83 | Hr, Hr, Hr,
84 | Q_lengths=self.response_len, K_lengths=self.response_len)
85 | Hr_stack.append(Hr)
86 |
87 | Hr_stack = tf.stack(Hr_stack, axis=-1)
88 |
89 |
90 | #context part
91 | #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
92 | list_turn_t = tf.unstack(self.turns, axis=1)
93 | list_turn_length = tf.unstack(self.every_turn_len, axis=1)
94 |
95 | sim_turns = []
96 | #for every turn_t calculate matching vector
97 | for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
98 | Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size]
99 | #Hu_stack = [Hu]
100 |
101 | if self._conf['is_positional'] and self._conf['stack_num'] > 0:
102 | with tf.variable_scope('positional', reuse=True):
103 | Hu = op.positional_encoding_vector(Hu, max_timescale=10)
104 | Hu_stack = [Hu]
105 |
106 |
107 | for index in range(self._conf['stack_num']):
108 |
109 | with tf.variable_scope('self_stack_' + str(index), reuse=True):
110 | Hu = layers.block(
111 | Hu, Hu, Hu,
112 | Q_lengths=t_turn_length, K_lengths=t_turn_length)
113 |
114 | Hu_stack.append(Hu)
115 |
116 |
117 | Hu_stack = tf.stack(Hu_stack, axis=-1)
118 | #print('Hu_stack shape: %s' %Hu_stack.shape)
119 |
120 | #calculate similarity matrix
121 | with tf.variable_scope('similarity'):
122 | # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
123 | # divide sqrt(200) to prevent gradient explosion
124 | sim = tf.einsum('biks,bjks->bijs', Hu_stack, Hr_stack) / tf.sqrt(200.0)
125 |
126 | sim_turns.append(sim)
127 |
128 |
129 | #cnn and aggregation
130 | sim = tf.stack(sim_turns, axis=1)
131 | print('sim shape: %s' %sim.shape)
132 | with tf.variable_scope('cnn_aggregation'):
133 | final_info = layers.CNN_3d(sim, 32, 16)
134 | #for douban
135 | #final_info = layers.CNN_3d(sim, 16, 16)
136 |
137 |
138 | #loss and train
139 | with tf.variable_scope('loss'):
140 | self.loss, self.logits = layers.loss(final_info, self.label)
141 |
142 | self.global_step = tf.Variable(0, trainable=False)
143 | initial_learning_rate = self._conf['learning_rate']
144 | self.learning_rate = tf.train.exponential_decay(
145 | initial_learning_rate,
146 | global_step=self.global_step,
147 | decay_steps=400,
148 | decay_rate=0.9,
149 | staircase=True)
150 |
151 | Optimizer = tf.train.AdamOptimizer(self.learning_rate)
152 | self.optimizer = Optimizer.minimize(self.loss)
153 |
154 | self.init = tf.global_variables_initializer()
155 | self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"])
156 | self.all_variables = tf.global_variables()
157 | self.all_operations = self._graph.get_operations()
158 | self.grads_and_vars = Optimizer.compute_gradients(self.loss)
159 |
160 | for grad, var in self.grads_and_vars:
161 | if grad is None:
162 | print var
163 |
164 | self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars]
165 | self.g_updates = Optimizer.apply_gradients(
166 | self.capped_gvs,
167 | global_step=self.global_step)
168 |
169 | return self._graph
170 |
171 |
--------------------------------------------------------------------------------
/ADE/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """train auto dialogue evaluation task"""
16 | import io
17 | import os
18 | import sys
19 | import six
20 | import time
21 | import numpy as np
22 |
23 | import paddle
24 | import paddle.fluid as fluid
25 |
26 | import ade.reader as reader
27 | from ade_net import create_net, set_word_embedding
28 |
29 | from ade.utils.configure import PDConfig
30 | from ade.utils.input_field import InputField
31 | from ade.utils.model_check import check_cuda
32 | import ade.utils.save_load_io as save_load_io
33 |
34 | try:
35 | import cPickle as pickle #python 2
36 | except ImportError as e:
37 | import pickle #python 3
38 |
39 |
40 | def do_train(args):
41 | """train function"""
42 |
43 | train_prog = fluid.default_main_program()
44 | startup_prog = fluid.default_startup_program()
45 |
46 | with fluid.program_guard(train_prog, startup_prog):
47 | train_prog.random_seed = args.random_seed
48 | startup_prog.random_seed = args.random_seed
49 |
50 | with fluid.unique_name.guard():
51 | context_wordseq = fluid.data(
52 | name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
53 | response_wordseq = fluid.data(
54 | name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
55 | labels = fluid.data(
56 | name='labels', shape=[-1, 1], dtype='int64')
57 |
58 | input_inst = [context_wordseq, response_wordseq, labels]
59 | input_field = InputField(input_inst)
60 | data_reader = fluid.io.PyReader(feed_list=input_inst,
61 | capacity=4, iterable=False)
62 |
63 | loss = create_net(
64 | is_training=True,
65 | model_input=input_field,
66 | args=args
67 | )
68 | loss.persistable = True
69 | # gradient clipping
70 | fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
71 | max=1.0, min=-1.0))
72 | optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
73 | optimizer.minimize(loss)
74 |
75 | if args.use_cuda:
76 | dev_count = fluid.core.get_cuda_device_count()
77 | place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
78 | else:
79 | dev_count = int(os.environ.get('CPU_NUM', 1))
80 | place = fluid.CPUPlace()
81 |
82 | processor = reader.DataProcessor(
83 | data_path=args.training_file,
84 | max_seq_length=args.max_seq_len,
85 | batch_size=args.batch_size)
86 |
87 | batch_generator = processor.data_generator(
88 | place=place,
89 | phase="train",
90 | shuffle=True,
91 | sample_pro=args.sample_pro)
92 |
93 | num_train_examples = processor.get_num_examples(phase='train')
94 | max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size
95 |
96 | print("Num train examples: %d" % num_train_examples)
97 | print("Max train steps: %d" % max_train_steps)
98 |
99 | data_reader.decorate_batch_generator(batch_generator)
100 |
101 | exe = fluid.Executor(place)
102 | exe.run(startup_prog)
103 |
104 | assert (args.init_from_checkpoint == "") or (
105 | args.init_from_pretrain_model == "")
106 |
107 | #init from some checkpoint, to resume the previous training
108 | if args.init_from_checkpoint:
109 | save_load_io.init_from_checkpoint(args, exe, train_prog)
110 | #init from some pretrain models, to better solve the current task
111 | if args.init_from_pretrain_model:
112 | save_load_io.init_from_pretrain_model(args, exe, train_prog)
113 |
114 | if args.word_emb_init:
115 | print("start loading word embedding init ...")
116 | if six.PY2:
117 | word_emb = np.array(pickle.load(io.open(args.word_emb_init, 'rb'))).astype('float32')
118 | else:
119 | word_emb = np.array(pickle.load(io.open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32')
120 | set_word_embedding(word_emb, place)
121 | print("finish init word embedding ...")
122 |
123 | build_strategy = fluid.compiler.BuildStrategy()
124 | build_strategy.enable_inplace = True
125 |
126 | compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
127 | loss_name=loss.name, build_strategy=build_strategy)
128 |
129 | steps = 0
130 | begin_time = time.time()
131 | time_begin = time.time()
132 |
133 | for epoch_step in range(args.epoch):
134 | data_reader.start()
135 | sum_loss = 0.0
136 | ce_loss = 0.0
137 | while True:
138 | try:
139 | fetch_list = [loss.name]
140 | outputs = exe.run(compiled_train_prog, fetch_list=fetch_list)
141 | np_loss = outputs
142 | sum_loss += np.array(np_loss).mean()
143 | ce_loss = np.array(np_loss).mean()
144 |
145 | if steps % args.print_steps == 0:
146 | time_end = time.time()
147 | used_time = time_end - time_begin
148 | current_time = time.strftime('%Y-%m-%d %H:%M:%S',
149 | time.localtime(time.time()))
150 | print('%s epoch: %d, step: %s, avg loss %s, speed: %f steps/s' % (current_time, epoch_step, steps, sum_loss / args.print_steps, args.print_steps / used_time))
151 | sum_loss = 0.0
152 | time_begin = time.time()
153 |
154 | if steps % args.save_steps == 0:
155 | if args.save_checkpoint:
156 | save_load_io.save_checkpoint(args, exe, train_prog, "step_" + str(steps))
157 | if args.save_param:
158 | save_load_io.save_param(args, exe, train_prog, "step_" + str(steps))
159 | steps += 1
160 | except fluid.core.EOFException:
161 | data_reader.reset()
162 | break
163 |
164 | if args.save_checkpoint:
165 | save_load_io.save_checkpoint(args, exe, train_prog, "step_final")
166 | if args.save_param:
167 | save_load_io.save_param(args, exe, train_prog, "step_final")
168 |
169 | def get_cards():
170 | num = 0
171 | cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
172 | if cards != '':
173 | num = len(cards.split(","))
174 | return num
175 |
176 | if args.enable_ce:
177 | card_num = get_cards()
178 | pass_time_cost = time.time() - begin_time
179 | print("test_card_num", card_num)
180 | print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost))
181 | print("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss))
182 |
183 |
184 | if __name__ == '__main__':
185 |
186 | args = PDConfig(yaml_file="./data/config/ade.yaml")
187 | args.build()
188 | args.Print()
189 |
190 | check_cuda(args.use_cuda)
191 |
192 | do_train(args)
193 |
--------------------------------------------------------------------------------
/DAM/models/last_net.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import cPickle as pickle
4 |
5 | import utils.layers as layers
6 | import utils.operations as op
7 |
8 | class Net(object):
9 | '''Add positional encoding(initializer lambda is 0),
10 | cross-attention, cnn integrated and grad clip by value.
11 |
12 | Attributes:
13 | conf: a configuration paramaters dict
14 | word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size]
15 | '''
16 | def __init__(self, conf):
17 | self._graph = tf.Graph()
18 | self._conf = conf
19 |
20 | if self._conf['word_emb_init'] is not None:
21 | print('loading word emb init')
22 | self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb'))
23 | else:
24 | self._word_embedding_init = None
25 |
26 | def build_graph(self):
27 | with self._graph.as_default():
28 | rand_seed = self._conf['rand_seed']
29 | tf.set_random_seed(rand_seed)
30 |
31 | #word embedding
32 | if self._word_embedding_init is not None:
33 | word_embedding_initializer = tf.constant_initializer(self._word_embedding_init)
34 | else:
35 | word_embedding_initializer = tf.random_normal_initializer(stddev=0.1)
36 |
37 | self._word_embedding = tf.get_variable(
38 | name='word_embedding',
39 | shape=[self._conf['vocab_size']+1, self._conf['emb_size']],
40 | dtype=tf.float32,
41 | initializer=word_embedding_initializer)
42 |
43 |
44 | #define placehloders
45 | self.turns = tf.placeholder(
46 | tf.int32,
47 | shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]])
48 |
49 | self.tt_turns_len = tf.placeholder(
50 | tf.int32,
51 | shape=[self._conf["batch_size"]])
52 |
53 | self.every_turn_len = tf.placeholder(
54 | tf.int32,
55 | shape=[self._conf["batch_size"], self._conf["max_turn_num"]])
56 |
57 | self.response = tf.placeholder(
58 | tf.int32,
59 | shape=[self._conf["batch_size"], self._conf["max_turn_len"]])
60 |
61 | self.response_len = tf.placeholder(
62 | tf.int32,
63 | shape=[self._conf["batch_size"]])
64 |
65 | self.label = tf.placeholder(
66 | tf.float32,
67 | shape=[self._conf["batch_size"]])
68 |
69 |
70 | #define operations
71 | #response part
72 | Hr = tf.nn.embedding_lookup(self._word_embedding, self.response)
73 |
74 | if self._conf['is_positional'] and self._conf['stack_num'] > 0:
75 | with tf.variable_scope('positional'):
76 | Hr = op.positional_encoding_vector(Hr, max_timescale=10)
77 |
78 | for index in range(self._conf['stack_num']):
79 | with tf.variable_scope('self_stack_' + str(index)):
80 | Hr = layers.block(
81 | Hr, Hr, Hr,
82 | Q_lengths=self.response_len, K_lengths=self.response_len)
83 |
84 | #context part
85 | #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
86 | list_turn_t = tf.unstack(self.turns, axis=1)
87 | list_turn_length = tf.unstack(self.every_turn_len, axis=1)
88 |
89 | sim_turns = []
90 | #for every turn_t calculate matching vector
91 | for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
92 | Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size]
93 |
94 | if self._conf['is_positional'] and self._conf['stack_num'] > 0:
95 | with tf.variable_scope('positional', reuse=True):
96 | Hu = op.positional_encoding_vector(Hu, max_timescale=10)
97 |
98 | for index in range(self._conf['stack_num']):
99 |
100 | with tf.variable_scope('self_stack_' + str(index), reuse=True):
101 | Hu = layers.block(
102 | Hu, Hu, Hu,
103 | Q_lengths=t_turn_length, K_lengths=t_turn_length)
104 |
105 |
106 |
107 | with tf.variable_scope('u_attentd_r_' + str(index)):
108 | try:
109 | u_a_r = layers.block(
110 | Hu, Hr, Hr,
111 | Q_lengths=t_turn_length, K_lengths=self.response_len)
112 | except ValueError:
113 | tf.get_variable_scope().reuse_variables()
114 | u_a_r = layers.block(
115 | Hu, Hr, Hr,
116 | Q_lengths=t_turn_length, K_lengths=self.response_len)
117 |
118 |
119 | with tf.variable_scope('r_attend_u_' + str(index)):
120 | try:
121 | r_a_u = layers.block(
122 | Hr, Hu, Hu,
123 | Q_lengths=self.response_len, K_lengths=t_turn_length)
124 | except ValueError:
125 | tf.get_variable_scope().reuse_variables()
126 | r_a_u = layers.block(
127 | Hr, Hu, Hu,
128 | Q_lengths=self.response_len, K_lengths=t_turn_length)
129 |
130 | u_a_r = tf.stack([u_a_r, Hu], axis=-1)
131 | r_a_u = tf.stack([r_a_u, Hr], axis=-1)
132 |
133 | #calculate similarity matrix
134 | with tf.variable_scope('similarity'):
135 | # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
136 | # divide sqrt(200) to prevent gradient explosion
137 | sim = tf.einsum('biks,bjks->bijs', r_a_u, u_a_r) / tf.sqrt(200.0)
138 |
139 | sim_turns.append(sim)
140 |
141 |
142 | #cnn and aggregation
143 | sim = tf.stack(sim_turns, axis=1)
144 | print('sim shape: %s' %sim.shape)
145 | with tf.variable_scope('cnn_aggregation'):
146 | final_info = layers.CNN_3d(sim, 32, 16)
147 | #for douban
148 | #final_info = layers.CNN_3d(sim, 16, 16)
149 |
150 | #loss and train
151 | with tf.variable_scope('loss'):
152 | self.loss, self.logits = layers.loss(final_info, self.label)
153 |
154 | self.global_step = tf.Variable(0, trainable=False)
155 | initial_learning_rate = self._conf['learning_rate']
156 | self.learning_rate = tf.train.exponential_decay(
157 | initial_learning_rate,
158 | global_step=self.global_step,
159 | decay_steps=400,
160 | decay_rate=0.9,
161 | staircase=True)
162 |
163 | Optimizer = tf.train.AdamOptimizer(self.learning_rate)
164 | self.optimizer = Optimizer.minimize(self.loss)
165 |
166 | self.init = tf.global_variables_initializer()
167 | self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"])
168 | self.all_variables = tf.global_variables()
169 | self.all_operations = self._graph.get_operations()
170 | self.grads_and_vars = Optimizer.compute_gradients(self.loss)
171 |
172 | for grad, var in self.grads_and_vars:
173 | if grad is None:
174 | print var
175 |
176 | self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars]
177 | self.g_updates = Optimizer.apply_gradients(
178 | self.capped_gvs,
179 | global_step=self.global_step)
180 |
181 | return self._graph
182 |
183 |
--------------------------------------------------------------------------------
/DGU/dgu/batching.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Mask, padding and batching."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import numpy as np
21 |
22 |
23 | def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
24 | """
25 | Add mask for batch_tokens, return out, mask_label, mask_pos;
26 | Note: mask_pos responding the batch_tokens after padded;
27 | """
28 | max_len = max([len(sent) for sent in batch_tokens])
29 | mask_label = []
30 | mask_pos = []
31 | prob_mask = np.random.rand(total_token_num)
32 | # Note: the first token is [CLS], so [low=1]
33 | replace_ids = np.random.randint(1, high=vocab_size, size=total_token_num)
34 | pre_sent_len = 0
35 | prob_index = 0
36 | for sent_index, sent in enumerate(batch_tokens):
37 | mask_flag = False
38 | prob_index += pre_sent_len
39 | for token_index, token in enumerate(sent):
40 | prob = prob_mask[prob_index + token_index]
41 | if prob > 0.15:
42 | continue
43 | elif 0.03 < prob <= 0.15:
44 | # mask
45 | if token != SEP and token != CLS:
46 | mask_label.append(sent[token_index])
47 | sent[token_index] = MASK
48 | mask_flag = True
49 | mask_pos.append(sent_index * max_len + token_index)
50 | elif 0.015 < prob <= 0.03:
51 | # random replace
52 | if token != SEP and token != CLS:
53 | mask_label.append(sent[token_index])
54 | sent[token_index] = replace_ids[prob_index + token_index]
55 | mask_flag = True
56 | mask_pos.append(sent_index * max_len + token_index)
57 | else:
58 | # keep the original token
59 | if token != SEP and token != CLS:
60 | mask_label.append(sent[token_index])
61 | mask_pos.append(sent_index * max_len + token_index)
62 | pre_sent_len = len(sent)
63 |
64 | # ensure at least mask one word in a sentence
65 | while not mask_flag:
66 | token_index = int(np.random.randint(1, high=len(sent) - 1, size=1))
67 | if sent[token_index] != SEP and sent[token_index] != CLS:
68 | mask_label.append(sent[token_index])
69 | sent[token_index] = MASK
70 | mask_flag = True
71 | mask_pos.append(sent_index * max_len + token_index)
72 | mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
73 | mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
74 | return batch_tokens, mask_label, mask_pos
75 |
76 |
77 | def prepare_batch_data(task_name,
78 | insts,
79 | max_len,
80 | total_token_num,
81 | voc_size=0,
82 | pad_id=None,
83 | cls_id=None,
84 | sep_id=None,
85 | mask_id=None,
86 | return_input_mask=True,
87 | return_max_len=True,
88 | return_num_token=False):
89 | """
90 | 1. generate Tensor of data
91 | 2. generate Tensor of position
92 | 3. generate self attention mask, [shape: batch_size * max_len * max_len]
93 | """
94 | batch_src_ids = [inst[0] for inst in insts]
95 | batch_sent_ids = [inst[1] for inst in insts]
96 | batch_pos_ids = [inst[2] for inst in insts]
97 | labels_list = []
98 | # compatible with squad, whose example includes start/end positions,
99 | # or unique id
100 |
101 | if isinstance(insts[0][3], list):
102 | if task_name == "atis_slot":
103 | labels_list = [inst[3] + [0] * (max_len - len(inst[3])) for inst in insts]
104 | labels_list = [np.array(labels_list).astype("int64").reshape([-1, max_len])]
105 | elif task_name == "dstc2":
106 | labels_list = [inst[3] for inst in insts]
107 | labels_list = [np.array(labels_list).astype("int64")]
108 | else:
109 | for i in range(3, len(insts[0]), 1):
110 | labels = [inst[i] for inst in insts]
111 | labels = np.array(labels).astype("int64").reshape([-1, 1])
112 | labels_list.append(labels)
113 |
114 | # First step: do mask without padding
115 | if mask_id >= 0:
116 | out, mask_label, mask_pos = mask(
117 | batch_src_ids,
118 | total_token_num,
119 | vocab_size=voc_size,
120 | CLS=cls_id,
121 | SEP=sep_id,
122 | MASK=mask_id)
123 | else:
124 | out = batch_src_ids
125 | # Second step: padding
126 | src_id, self_input_mask = pad_batch_data(
127 | out,
128 | max_len,
129 | pad_idx=pad_id,
130 | return_input_mask=True)
131 | pos_id = pad_batch_data(
132 | batch_pos_ids,
133 | max_len,
134 | pad_idx=pad_id,
135 | return_pos=False,
136 | return_input_mask=False)
137 | sent_id = pad_batch_data(
138 | batch_sent_ids,
139 | max_len,
140 | pad_idx=pad_id,
141 | return_pos=False,
142 | return_input_mask=False)
143 |
144 | if mask_id >= 0:
145 | return_list = [
146 | src_id, pos_id, sent_id, self_input_mask, mask_label, mask_pos
147 | ] + labels_list
148 | else:
149 | return_list = [src_id, pos_id, sent_id, self_input_mask] + labels_list
150 |
151 | return return_list if len(return_list) > 1 else return_list[0]
152 |
153 |
154 | def pad_batch_data(insts,
155 | max_len_in,
156 | pad_idx=0,
157 | return_pos=False,
158 | return_input_mask=False,
159 | return_max_len=False,
160 | return_num_token=False):
161 | """
162 | Pad the instances to the max sequence length in batch, and generate the
163 | corresponding position data and attention bias.
164 | """
165 | return_list = []
166 | max_len = max_len_in if max_len_in != -1 else max(len(inst) for inst in insts)
167 | # Any token included in dict can be used to pad, since the paddings' loss
168 | # will be masked out by weights and make no effect on parameter gradients.
169 |
170 | inst_data = np.array(
171 | [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts
172 | ])
173 | return_list += [inst_data.astype("int64").reshape([-1, max_len])]
174 |
175 | # position data
176 | if return_pos:
177 | inst_pos = np.array([
178 | list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
179 | for inst in insts
180 | ])
181 |
182 | return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
183 |
184 | if return_input_mask:
185 | # This is used to avoid attention on paddings.
186 | input_mask_data = np.array([[1] * len(inst) + [0] *
187 | (max_len - len(inst)) for inst in insts])
188 | input_mask_data = np.expand_dims(input_mask_data, axis=-1)
189 | return_list += [input_mask_data.astype("float32")]
190 |
191 | if return_max_len:
192 | return_list += [max_len]
193 |
194 | if return_num_token:
195 | num_token = 0
196 | for inst in insts:
197 | num_token += len(inst)
198 | return_list += [num_token]
199 |
200 | return return_list if len(return_list) > 1 else return_list[0]
201 |
202 |
203 | if __name__ == "__main__":
204 | pass
205 |
--------------------------------------------------------------------------------
/DGU/dgu/scripts/build_swda_dataset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """build swda train dev test dataset"""
16 |
17 | import sys
18 | import csv
19 | import os
20 | import io
21 | import re
22 |
23 | import commonlib
24 |
25 |
26 | class SWDA(object):
27 | """
28 | dialogue act dataset swda data process
29 | """
30 | def __init__(self):
31 | """
32 | init instance
33 | """
34 | self.tag_id = 0
35 | self.map_tag_dict = dict()
36 | self.out_dir = "../../data/input/data/swda"
37 | self.data_list = "./conf/swda.conf"
38 | self.map_tag = "../../data/input/data/swda/map_tag_id.txt"
39 | self.src_dir = "../../data/input/data/swda/source_data/swda"
40 | self._load_file()
41 |
42 | def _load_file(self):
43 | """
44 | load dataset filename
45 | """
46 | self.data_dict = commonlib.load_dict(self.data_list)
47 | self.file_dict = {}
48 | child_dir = commonlib.get_dir_list(self.src_dir)
49 | for chd in child_dir:
50 | file_list, file_path = commonlib.get_file_list(chd)
51 | for i in range(len(file_list)):
52 | name = file_list[i]
53 | keyword = "sw%s" % name.split('.')[0].split('_')[-1]
54 | self.file_dict[keyword] = file_path[i]
55 |
56 | def _parser_dataset(self, data_type):
57 | """
58 | parser train dev test dataset
59 | """
60 | out_filename = "%s/%s.txt" % (self.out_dir, data_type)
61 | fw = io.open(out_filename, 'w', encoding='utf8')
62 | for name in self.data_dict[data_type]:
63 | file_path = self.file_dict[name]
64 | fr = io.open(file_path, 'r', encoding="utf8")
65 | idx = 0
66 | row = csv.reader(fr, delimiter = ',')
67 | for r in row:
68 | if idx == 0:
69 | idx += 1
70 | continue
71 | out = self._parser_utterence(r)
72 | fw.write(u"%s\n" % out)
73 |
74 | def _clean_text(self, text):
75 | """
76 | text cleaning for dialogue act dataset
77 | """
78 | if text.startswith('<') and text.endswith('>.'):
79 | return text
80 | if "[" in text or "]" in text:
81 | stat = True
82 | else:
83 | stat = False
84 | group = re.findall("\[.*?\+.*?\]", text)
85 | while group and stat:
86 | for elem in group:
87 | elem_src = elem
88 | elem = re.sub('\+', '', elem.lstrip('[').rstrip(']'))
89 | text = text.replace(elem_src, elem)
90 | if "[" in text or "]" in text:
91 | stat = True
92 | else:
93 | stat = False
94 | group = re.findall("\[.*?\+.*?\]", text)
95 | if "{" in text or "}" in text:
96 | stat = True
97 | else:
98 | stat = False
99 | group = re.findall("{[A-Z].*?}", text)
100 | while group and stat:
101 | child_group = re.findall("{[A-Z]*(.*?)}", text)
102 | for i in range(len(group)):
103 | text = text.replace(group[i], child_group[i])
104 | if "{" in text or "}" in text:
105 | stat = True
106 | else:
107 | stat = False
108 | group = re.findall("{[A-Z].*?}", text)
109 | if "(" in text or ")" in text:
110 | stat = True
111 | else:
112 | stat = False
113 | group = re.findall("\(\(.*?\)\)", text)
114 | while group and stat:
115 | for elem in group:
116 | if elem:
117 | elem_clean = re.sub("\(|\)", "", elem)
118 | text = text.replace(elem, elem_clean)
119 | else:
120 | text = text.replace(elem, "mumblex")
121 | if "(" in text or ")" in text:
122 | stat = True
123 | else:
124 | stat = False
125 | group = re.findall("\(\((.*?)\)\)", text)
126 |
127 | group = re.findall("\<.*?\>", text)
128 | if group:
129 | for elem in group:
130 | text = text.replace(elem, "")
131 |
132 | text = re.sub(r" \'s", "\'s", text)
133 | text = re.sub(r" n\'t", "n\'t", text)
134 | text = re.sub(r" \'t", "\'t", text)
135 | text = re.sub(" +", " ", text)
136 | text = text.rstrip('\/').strip().strip('-')
137 | text = re.sub("\[|\]|\+|\>|\<|\{|\}", "", text)
138 | return text.strip().lower()
139 |
140 | def _map_tag(self, da_tag):
141 | """
142 | map tag to 42 classes
143 | """
144 | curr_da_tags = []
145 | curr_das = re.split(r"\s*[,;]\s*", da_tag)
146 | for curr_da in curr_das:
147 | if curr_da == "qy_d" or curr_da == "qw^d" or curr_da == "b^m":
148 | pass
149 | elif curr_da == "nn^e":
150 | curr_da = "ng"
151 | elif curr_da == "ny^e":
152 | curr_da = "na"
153 | else:
154 | curr_da = re.sub(r'(.)\^.*', r'\1', curr_da)
155 | curr_da = re.sub(r'[\(\)@*]', '', curr_da)
156 | tag = curr_da
157 | if tag in ('qr', 'qy'):
158 | tag = 'qy'
159 | elif tag in ('fe', 'ba'):
160 | tag = 'ba'
161 | elif tag in ('oo', 'co', 'cc'):
162 | tag = 'oo_co_cc'
163 | elif tag in ('fx', 'sv'):
164 | tag = 'sv'
165 | elif tag in ('aap', 'am'):
166 | tag = 'aap_am'
167 | elif tag in ('arp', 'nd'):
168 | tag = 'arp_nd'
169 | elif tag in ('fo', 'o', 'fw', '"', 'by', 'bc'):
170 | tag = 'fo_o_fw_"_by_bc'
171 | curr_da = tag
172 | curr_da_tags.append(curr_da)
173 | if curr_da_tags[0] not in self.map_tag_dict:
174 | self.map_tag_dict[curr_da_tags[0]] = self.tag_id
175 | self.tag_id += 1
176 | return self.map_tag_dict[curr_da_tags[0]]
177 |
178 | def _parser_utterence(self, line):
179 | """
180 | parser one turn dialogue
181 | """
182 | conversation_no = line[2]
183 | act_tag = line[4]
184 | caller = line[5]
185 | text = line[8]
186 | text = self._clean_text(text)
187 | act_tag = self._map_tag(act_tag)
188 |
189 | out = "%s\t%s\t%s\t%s" % (conversation_no, act_tag, caller, text)
190 | return out
191 |
192 | def get_train_dataset(self):
193 | """
194 | parser train dataset and print train.txt
195 | """
196 | self._parser_dataset("train")
197 |
198 | def get_dev_dataset(self):
199 | """
200 | parser dev dataset and print dev.txt
201 | """
202 | self._parser_dataset("dev")
203 |
204 | def get_test_dataset(self):
205 | """
206 | parser test dataset and print test.txt
207 | """
208 | self._parser_dataset("test")
209 |
210 | def get_labels(self):
211 | """
212 | get tag and map ids file
213 | """
214 | fw = io.open(self.map_tag, 'w', encoding='utf8')
215 | for elem in self.map_tag_dict:
216 | fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem]))
217 |
218 | def main(self):
219 | """
220 | run data process
221 | """
222 | self.get_train_dataset()
223 | self.get_dev_dataset()
224 | self.get_test_dataset()
225 | self.get_labels()
226 |
227 | if __name__ == "__main__":
228 | swda_inst = SWDA()
229 | swda_inst.main()
230 |
231 |
232 |
233 |
234 |
--------------------------------------------------------------------------------
/DAM/models/cross_match_net.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import cPickle as pickle
4 |
5 | import utils.layers as layers
6 | import utils.operations as op
7 |
8 | class Net(object):
9 | '''Add positional encoding(initializer lambda is 0),
10 | cross-attention, cnn integrated and grad clip by value.
11 |
12 | Attributes:
13 | conf: a configuration paramaters dict
14 | word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size]
15 | '''
16 | def __init__(self, conf):
17 | self._graph = tf.Graph()
18 | self._conf = conf
19 |
20 | if self._conf['word_emb_init'] is not None:
21 | print('loading word emb init')
22 | self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb'))
23 | else:
24 | self._word_embedding_init = None
25 |
26 | def build_graph(self):
27 | with self._graph.as_default():
28 | rand_seed = self._conf['rand_seed']
29 | tf.set_random_seed(rand_seed)
30 |
31 | #word embedding
32 | if self._word_embedding_init is not None:
33 | word_embedding_initializer = tf.constant_initializer(self._word_embedding_init)
34 | else:
35 | word_embedding_initializer = tf.random_normal_initializer(stddev=0.1)
36 |
37 | self._word_embedding = tf.get_variable(
38 | name='word_embedding',
39 | shape=[self._conf['vocab_size']+1, self._conf['emb_size']],
40 | dtype=tf.float32,
41 | initializer=word_embedding_initializer)
42 |
43 |
44 | #define placehloders
45 | self.turns = tf.placeholder(
46 | tf.int32,
47 | shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]])
48 |
49 | self.tt_turns_len = tf.placeholder(
50 | tf.int32,
51 | shape=[self._conf["batch_size"]])
52 |
53 | self.every_turn_len = tf.placeholder(
54 | tf.int32,
55 | shape=[self._conf["batch_size"], self._conf["max_turn_num"]])
56 |
57 | self.response = tf.placeholder(
58 | tf.int32,
59 | shape=[self._conf["batch_size"], self._conf["max_turn_len"]])
60 |
61 | self.response_len = tf.placeholder(
62 | tf.int32,
63 | shape=[self._conf["batch_size"]])
64 |
65 | self.label = tf.placeholder(
66 | tf.float32,
67 | shape=[self._conf["batch_size"]])
68 |
69 |
70 | #define operations
71 | #response part
72 | Hr = tf.nn.embedding_lookup(self._word_embedding, self.response)
73 |
74 | if self._conf['is_positional'] and self._conf['stack_num'] > 0:
75 | with tf.variable_scope('positional'):
76 | Hr = op.positional_encoding_vector(Hr, max_timescale=10)
77 | Hr_stack = [Hr]
78 |
79 | for index in range(self._conf['stack_num']):
80 | with tf.variable_scope('self_stack_' + str(index)):
81 | Hr = layers.block(
82 | Hr, Hr, Hr,
83 | Q_lengths=self.response_len, K_lengths=self.response_len)
84 | Hr_stack.append(Hr)
85 |
86 |
87 | #context part
88 | #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
89 | list_turn_t = tf.unstack(self.turns, axis=1)
90 | list_turn_length = tf.unstack(self.every_turn_len, axis=1)
91 |
92 | sim_turns = []
93 | #for every turn_t calculate matching vector
94 | for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
95 | Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size]
96 |
97 | if self._conf['is_positional'] and self._conf['stack_num'] > 0:
98 | with tf.variable_scope('positional', reuse=True):
99 | Hu = op.positional_encoding_vector(Hu, max_timescale=10)
100 | Hu_stack = [Hu]
101 |
102 | for index in range(self._conf['stack_num']):
103 |
104 | with tf.variable_scope('self_stack_' + str(index), reuse=True):
105 | Hu = layers.block(
106 | Hu, Hu, Hu,
107 | Q_lengths=t_turn_length, K_lengths=t_turn_length)
108 |
109 | Hu_stack.append(Hu)
110 |
111 |
112 |
113 | r_a_t_stack = []
114 | t_a_r_stack = []
115 | for index in range(self._conf['stack_num']+1):
116 |
117 | with tf.variable_scope('t_attend_r_' + str(index)):
118 | try:
119 | t_a_r = layers.block(
120 | Hu_stack[index], Hr_stack[index], Hr_stack[index],
121 | Q_lengths=t_turn_length, K_lengths=self.response_len)
122 | except ValueError:
123 | tf.get_variable_scope().reuse_variables()
124 | t_a_r = layers.block(
125 | Hu_stack[index], Hr_stack[index], Hr_stack[index],
126 | Q_lengths=t_turn_length, K_lengths=self.response_len)
127 |
128 |
129 | with tf.variable_scope('r_attend_t_' + str(index)):
130 | try:
131 | r_a_t = layers.block(
132 | Hr_stack[index], Hu_stack[index], Hu_stack[index],
133 | Q_lengths=self.response_len, K_lengths=t_turn_length)
134 | except ValueError:
135 | tf.get_variable_scope().reuse_variables()
136 | r_a_t = layers.block(
137 | Hr_stack[index], Hu_stack[index], Hu_stack[index],
138 | Q_lengths=self.response_len, K_lengths=t_turn_length)
139 |
140 | t_a_r_stack.append(t_a_r)
141 | r_a_t_stack.append(r_a_t)
142 |
143 |
144 | t_a_r = tf.stack(t_a_r_stack, axis=-1)
145 | r_a_t = tf.stack(r_a_t_stack, axis=-1)
146 |
147 |
148 | #calculate similarity matrix
149 | with tf.variable_scope('similarity'):
150 | # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
151 | # divide sqrt(200) to prevent gradient explosion
152 | sim = tf.einsum('biks,bjks->bijs', t_a_r, r_a_t) / tf.sqrt(200.0)
153 |
154 | sim_turns.append(sim)
155 |
156 |
157 | #cnn and aggregation
158 | sim = tf.stack(sim_turns, axis=1)
159 | print('sim shape: %s' %sim.shape)
160 | with tf.variable_scope('cnn_aggregation'):
161 | final_info = layers.CNN_3d(sim, 32, 16)
162 | #for douban
163 | #final_info = layers.CNN_3d(sim, 16, 16)
164 |
165 |
166 | #loss and train
167 | with tf.variable_scope('loss'):
168 | self.loss, self.logits = layers.loss(final_info, self.label)
169 |
170 | self.global_step = tf.Variable(0, trainable=False)
171 | initial_learning_rate = self._conf['learning_rate']
172 | self.learning_rate = tf.train.exponential_decay(
173 | initial_learning_rate,
174 | global_step=self.global_step,
175 | decay_steps=400,
176 | decay_rate=0.9,
177 | staircase=True)
178 |
179 | Optimizer = tf.train.AdamOptimizer(self.learning_rate)
180 | self.optimizer = Optimizer.minimize(
181 | self.loss,
182 | global_step=self.global_step)
183 |
184 | self.init = tf.global_variables_initializer()
185 | self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"])
186 | self.all_variables = tf.global_variables()
187 | self.all_operations = self._graph.get_operations()
188 | self.grads_and_vars = Optimizer.compute_gradients(self.loss)
189 |
190 | for grad, var in self.grads_and_vars:
191 | if grad is None:
192 | print var
193 |
194 | self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars]
195 | self.g_updates = Optimizer.apply_gradients(
196 | self.capped_gvs,
197 | global_step=self.global_step)
198 |
199 | return self._graph
200 |
201 |
--------------------------------------------------------------------------------
/DAM/models/net.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import cPickle as pickle
4 |
5 | import utils.layers as layers
6 | import utils.operations as op
7 |
8 | class Net(object):
9 | '''Add positional encoding(initializer lambda is 0),
10 | cross-attention, cnn integrated and grad clip by value.
11 |
12 | Attributes:
13 | conf: a configuration paramaters dict
14 | word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size]
15 | '''
16 | def __init__(self, conf):
17 | self._graph = tf.Graph()
18 | self._conf = conf
19 |
20 | if self._conf['word_emb_init'] is not None:
21 | print('loading word emb init')
22 | self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb'))
23 | else:
24 | self._word_embedding_init = None
25 |
26 | def build_graph(self):
27 | with self._graph.as_default():
28 | if self._conf['rand_seed'] is not None:
29 | rand_seed = self._conf['rand_seed']
30 | tf.set_random_seed(rand_seed)
31 | print('set tf random seed: %s' %self._conf['rand_seed'])
32 |
33 | #word embedding
34 | if self._word_embedding_init is not None:
35 | word_embedding_initializer = tf.constant_initializer(self._word_embedding_init)
36 | else:
37 | word_embedding_initializer = tf.random_normal_initializer(stddev=0.1)
38 |
39 | self._word_embedding = tf.get_variable(
40 | name='word_embedding',
41 | shape=[self._conf['vocab_size']+1, self._conf['emb_size']],
42 | dtype=tf.float32,
43 | initializer=word_embedding_initializer)
44 |
45 |
46 | #define placehloders
47 | self.turns = tf.placeholder(
48 | tf.int32,
49 | shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]])
50 |
51 | self.tt_turns_len = tf.placeholder(
52 | tf.int32,
53 | shape=[self._conf["batch_size"]])
54 |
55 | self.every_turn_len = tf.placeholder(
56 | tf.int32,
57 | shape=[self._conf["batch_size"], self._conf["max_turn_num"]])
58 |
59 | self.response = tf.placeholder(
60 | tf.int32,
61 | shape=[self._conf["batch_size"], self._conf["max_turn_len"]])
62 |
63 | self.response_len = tf.placeholder(
64 | tf.int32,
65 | shape=[self._conf["batch_size"]])
66 |
67 | self.label = tf.placeholder(
68 | tf.float32,
69 | shape=[self._conf["batch_size"]])
70 |
71 |
72 | #define operations
73 | #response part
74 | Hr = tf.nn.embedding_lookup(self._word_embedding, self.response)
75 |
76 | if self._conf['is_positional'] and self._conf['stack_num'] > 0:
77 | with tf.variable_scope('positional'):
78 | Hr = op.positional_encoding_vector(Hr, max_timescale=10)
79 | Hr_stack = [Hr]
80 |
81 | for index in range(self._conf['stack_num']):
82 | with tf.variable_scope('self_stack_' + str(index)):
83 | Hr = layers.block(
84 | Hr, Hr, Hr,
85 | Q_lengths=self.response_len, K_lengths=self.response_len)
86 | Hr_stack.append(Hr)
87 |
88 |
89 | #context part
90 | #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
91 | list_turn_t = tf.unstack(self.turns, axis=1)
92 | list_turn_length = tf.unstack(self.every_turn_len, axis=1)
93 |
94 | sim_turns = []
95 | #for every turn_t calculate matching vector
96 | for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
97 | Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size]
98 |
99 | if self._conf['is_positional'] and self._conf['stack_num'] > 0:
100 | with tf.variable_scope('positional', reuse=True):
101 | Hu = op.positional_encoding_vector(Hu, max_timescale=10)
102 | Hu_stack = [Hu]
103 |
104 | for index in range(self._conf['stack_num']):
105 |
106 | with tf.variable_scope('self_stack_' + str(index), reuse=True):
107 | Hu = layers.block(
108 | Hu, Hu, Hu,
109 | Q_lengths=t_turn_length, K_lengths=t_turn_length)
110 |
111 | Hu_stack.append(Hu)
112 |
113 |
114 |
115 | r_a_t_stack = []
116 | t_a_r_stack = []
117 | for index in range(self._conf['stack_num']+1):
118 |
119 | with tf.variable_scope('t_attend_r_' + str(index)):
120 | try:
121 | t_a_r = layers.block(
122 | Hu_stack[index], Hr_stack[index], Hr_stack[index],
123 | Q_lengths=t_turn_length, K_lengths=self.response_len)
124 | except ValueError:
125 | tf.get_variable_scope().reuse_variables()
126 | t_a_r = layers.block(
127 | Hu_stack[index], Hr_stack[index], Hr_stack[index],
128 | Q_lengths=t_turn_length, K_lengths=self.response_len)
129 |
130 |
131 | with tf.variable_scope('r_attend_t_' + str(index)):
132 | try:
133 | r_a_t = layers.block(
134 | Hr_stack[index], Hu_stack[index], Hu_stack[index],
135 | Q_lengths=self.response_len, K_lengths=t_turn_length)
136 | except ValueError:
137 | tf.get_variable_scope().reuse_variables()
138 | r_a_t = layers.block(
139 | Hr_stack[index], Hu_stack[index], Hu_stack[index],
140 | Q_lengths=self.response_len, K_lengths=t_turn_length)
141 |
142 | t_a_r_stack.append(t_a_r)
143 | r_a_t_stack.append(r_a_t)
144 |
145 | t_a_r_stack.extend(Hu_stack)
146 | r_a_t_stack.extend(Hr_stack)
147 |
148 | t_a_r = tf.stack(t_a_r_stack, axis=-1)
149 | r_a_t = tf.stack(r_a_t_stack, axis=-1)
150 |
151 |
152 | #calculate similarity matrix
153 | with tf.variable_scope('similarity'):
154 | # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
155 | # divide sqrt(200) to prevent gradient explosion
156 | sim = tf.einsum('biks,bjks->bijs', t_a_r, r_a_t) / tf.sqrt(200.0)
157 |
158 | sim_turns.append(sim)
159 |
160 |
161 | #cnn and aggregation
162 | sim = tf.stack(sim_turns, axis=1)
163 | print('sim shape: %s' %sim.shape)
164 | with tf.variable_scope('cnn_aggregation'):
165 | final_info = layers.CNN_3d(sim, 32, 16)
166 | #for douban
167 | #final_info = layers.CNN_3d(sim, 16, 16)
168 |
169 | #loss and train
170 | with tf.variable_scope('loss'):
171 | self.loss, self.logits = layers.loss(final_info, self.label)
172 |
173 | self.global_step = tf.Variable(0, trainable=False)
174 | initial_learning_rate = self._conf['learning_rate']
175 | self.learning_rate = tf.train.exponential_decay(
176 | initial_learning_rate,
177 | global_step=self.global_step,
178 | decay_steps=400,
179 | decay_rate=0.9,
180 | staircase=True)
181 |
182 | Optimizer = tf.train.AdamOptimizer(self.learning_rate)
183 | self.optimizer = Optimizer.minimize(
184 | self.loss,
185 | global_step=self.global_step)
186 |
187 | self.init = tf.global_variables_initializer()
188 | self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"])
189 | self.all_variables = tf.global_variables()
190 | self.all_operations = self._graph.get_operations()
191 | self.grads_and_vars = Optimizer.compute_gradients(self.loss)
192 |
193 | for grad, var in self.grads_and_vars:
194 | if grad is None:
195 | print var
196 |
197 | self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars]
198 | self.g_updates = Optimizer.apply_gradients(
199 | self.capped_gvs,
200 | global_step=self.global_step)
201 |
202 | return self._graph
203 |
204 |
--------------------------------------------------------------------------------
/DGU/dgu/bert.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """BERT model."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import os
22 | import io
23 | import sys
24 | import six
25 | import json
26 | import numpy as np
27 | import paddle.fluid as fluid
28 |
29 | from dgu.transformer_encoder import encoder, pre_process_layer
30 |
31 |
32 | class BertConfig(object):
33 | def __init__(self, config_path):
34 | self._config_dict = self._parse(config_path)
35 |
36 | def _parse(self, config_path):
37 | try:
38 | json_file = io.open(config_path, 'r', encoding="utf8")
39 | config_dict = json.load(json_file)
40 | except Exception:
41 | raise IOError("Error in parsing bert model config file '%s'" %
42 | config_path)
43 | else:
44 | return config_dict
45 |
46 | def __getitem__(self, key):
47 | return self._config_dict[key]
48 |
49 | def print_config(self):
50 | for arg, value in sorted(six.iteritems(self._config_dict)):
51 | print('%s: %s' % (arg, value))
52 | print('------------------------------------------------')
53 |
54 |
55 | class BertModel(object):
56 | def __init__(self,
57 | src_ids,
58 | position_ids,
59 | sentence_ids,
60 | input_mask,
61 | config,
62 | weight_sharing=True,
63 | use_fp16=False):
64 |
65 | self._emb_size = config['hidden_size']
66 | self._n_layer = config['num_hidden_layers']
67 | self._n_head = config['num_attention_heads']
68 | self._voc_size = config['vocab_size']
69 | self._max_position_seq_len = config['max_position_embeddings']
70 | self._sent_types = config['type_vocab_size']
71 | self._hidden_act = config['hidden_act']
72 | self._prepostprocess_dropout = config['hidden_dropout_prob']
73 | self._attention_dropout = config['attention_probs_dropout_prob']
74 | self._weight_sharing = weight_sharing
75 |
76 | self._word_emb_name = "word_embedding"
77 | self._pos_emb_name = "pos_embedding"
78 | self._sent_emb_name = "sent_embedding"
79 | self._dtype = "float16" if use_fp16 else "float32"
80 |
81 | # Initialize all weigths by truncated normal initializer, and all biases
82 | # will be initialized by constant zero by default.
83 | self._param_initializer = fluid.initializer.TruncatedNormal(
84 | scale=config['initializer_range'])
85 |
86 | self._build_model(src_ids, position_ids, sentence_ids, input_mask)
87 |
88 | def _build_model(self, src_ids, position_ids, sentence_ids, input_mask):
89 | # padding id in vocabulary must be set to 0
90 | emb_out = fluid.input.embedding(
91 | input=src_ids,
92 | size=[self._voc_size, self._emb_size],
93 | dtype=self._dtype,
94 | param_attr=fluid.ParamAttr(
95 | name=self._word_emb_name, initializer=self._param_initializer),
96 | is_sparse=False)
97 | position_emb_out = fluid.input.embedding(
98 | input=position_ids,
99 | size=[self._max_position_seq_len, self._emb_size],
100 | dtype=self._dtype,
101 | param_attr=fluid.ParamAttr(
102 | name=self._pos_emb_name, initializer=self._param_initializer))
103 |
104 | sent_emb_out = fluid.input.embedding(
105 | sentence_ids,
106 | size=[self._sent_types, self._emb_size],
107 | dtype=self._dtype,
108 | param_attr=fluid.ParamAttr(
109 | name=self._sent_emb_name, initializer=self._param_initializer))
110 |
111 | emb_out = emb_out + position_emb_out
112 | emb_out = emb_out + sent_emb_out
113 |
114 | emb_out = pre_process_layer(
115 | emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder')
116 |
117 | if self._dtype == "float16":
118 | input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype)
119 |
120 | self_attn_mask = fluid.layers.matmul(
121 | x=input_mask, y=input_mask, transpose_y=True)
122 | self_attn_mask = fluid.layers.scale(
123 | x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False)
124 | n_head_self_attn_mask = fluid.layers.stack(
125 | x=[self_attn_mask] * self._n_head, axis=1)
126 | n_head_self_attn_mask.stop_gradient = True
127 |
128 | self._enc_out = encoder(
129 | enc_input=emb_out,
130 | attn_bias=n_head_self_attn_mask,
131 | n_layer=self._n_layer,
132 | n_head=self._n_head,
133 | d_key=self._emb_size // self._n_head,
134 | d_value=self._emb_size // self._n_head,
135 | d_model=self._emb_size,
136 | d_inner_hid=self._emb_size * 4,
137 | prepostprocess_dropout=self._prepostprocess_dropout,
138 | attention_dropout=self._attention_dropout,
139 | relu_dropout=0,
140 | hidden_act=self._hidden_act,
141 | preprocess_cmd="",
142 | postprocess_cmd="dan",
143 | param_initializer=self._param_initializer,
144 | name='encoder')
145 |
146 | def get_sequence_output(self):
147 | return self._enc_out
148 |
149 | def get_pooled_output(self):
150 | """Get the first feature of each sequence for classification"""
151 |
152 | next_sent_feat = fluid.layers.slice(
153 | input=self._enc_out, axes=[1], starts=[0], ends=[1])
154 | next_sent_feat = fluid.layers.fc(
155 | input=next_sent_feat,
156 | size=self._emb_size,
157 | act="tanh",
158 | param_attr=fluid.ParamAttr(
159 | name="pooled_fc.w_0", initializer=self._param_initializer),
160 | bias_attr="pooled_fc.b_0")
161 | return next_sent_feat
162 |
163 | def get_pretraining_output(self, mask_label, mask_pos, labels):
164 | """Get the loss & accuracy for pretraining"""
165 |
166 | mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
167 |
168 | # extract the first token feature in each sentence
169 | next_sent_feat = self.get_pooled_output()
170 | reshaped_emb_out = fluid.layers.reshape(
171 | x=self._enc_out, shape=[-1, self._emb_size])
172 | # extract masked tokens' feature
173 | mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos)
174 |
175 | # transform: fc
176 | mask_trans_feat = fluid.layers.fc(
177 | input=mask_feat,
178 | size=self._emb_size,
179 | act=self._hidden_act,
180 | param_attr=fluid.ParamAttr(
181 | name='mask_lm_trans_fc.w_0',
182 | initializer=self._param_initializer),
183 | bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0'))
184 | # transform: layer norm
185 | mask_trans_feat = pre_process_layer(
186 | mask_trans_feat, 'n', name='mask_lm_trans')
187 |
188 | mask_lm_out_bias_attr = fluid.ParamAttr(
189 | name="mask_lm_out_fc.b_0",
190 | initializer=fluid.initializer.Constant(value=0.0))
191 | if self._weight_sharing:
192 | fc_out = fluid.layers.matmul(
193 | x=mask_trans_feat,
194 | y=fluid.default_main_program().global_block().var(
195 | self._word_emb_name),
196 | transpose_y=True)
197 | fc_out += fluid.layers.create_parameter(
198 | shape=[self._voc_size],
199 | dtype=self._dtype,
200 | attr=mask_lm_out_bias_attr,
201 | is_bias=True)
202 |
203 | else:
204 | fc_out = fluid.layers.fc(input=mask_trans_feat,
205 | size=self._voc_size,
206 | param_attr=fluid.ParamAttr(
207 | name="mask_lm_out_fc.w_0",
208 | initializer=self._param_initializer),
209 | bias_attr=mask_lm_out_bias_attr)
210 |
211 | mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
212 | logits=fc_out, label=mask_label)
213 | mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss)
214 |
215 | next_sent_fc_out = fluid.layers.fc(
216 | input=next_sent_feat,
217 | size=2,
218 | param_attr=fluid.ParamAttr(
219 | name="next_sent_fc.w_0", initializer=self._param_initializer),
220 | bias_attr="next_sent_fc.b_0")
221 |
222 | next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy(
223 | logits=next_sent_fc_out, label=labels, return_softmax=True)
224 |
225 | next_sent_acc = fluid.layers.accuracy(
226 | input=next_sent_softmax, label=labels)
227 |
228 | mean_next_sent_loss = fluid.layers.mean(next_sent_loss)
229 |
230 | loss = mean_next_sent_loss + mean_mask_lm_loss
231 | return next_sent_acc, mean_mask_lm_loss, loss
232 |
--------------------------------------------------------------------------------