├── ADE
    ├── ade
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── input_field.py
    │   │   ├── model_check.py
    │   │   └── save_load_io.py
    │   ├── evaluate.py
    │   ├── prepare_data_and_model.py
    │   └── reader.py
    ├── data
    │   ├── input
    │   │   └── input.md
    │   ├── output
    │   │   └── output.md
    │   ├── inference_models
    │   │   └── inference_models.md
    │   ├── saved_models
    │   │   └── saved_models.md
    │   ├── pretrain_model
    │   │   └── pretrain_model.md
    │   └── config
    │   │   └── ade.yaml
    ├── main.py
    ├── .run_ce.sh
    ├── _ce.py
    ├── eval.py
    ├── inference_model.py
    ├── ade_net.py
    ├── predict.py
    ├── run.sh
    └── train.py
├── DAM
    ├── bin
    │   ├── __init__.py
    │   ├── test_and_evaluate.py
    │   └── train_and_evaluate.py
    ├── models
    │   ├── __init__.py
    │   ├── self_match_net.py
    │   ├── last_net.py
    │   ├── cross_match_net.py
    │   └── net.py
    ├── utils
    │   ├── __init__.py
    │   ├── evaluation.py
    │   ├── douban_evaluation.py
    │   └── reader.py
    ├── log
    │   └── ReadMe.txt
    ├── output
    │   └── ReadMe.txt
    ├── run.sh
    ├── appendix
    │   ├── Figure1.png
    │   └── Figure2.png
    ├── data
    │   └── ReadMe.txt
    ├── main.py
    └── README.md
├── DGU
    ├── dgu
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── py23.py
    │   │   ├── input_field.py
    │   │   ├── model_check.py
    │   │   ├── fp16.py
    │   │   └── save_load_io.py
    │   ├── scripts
    │   │   ├── README.md
    │   │   ├── conf
    │   │   │   └── mrda.conf
    │   │   ├── run_build_data.py
    │   │   ├── commonlib.py
    │   │   ├── build_mrda_dataset.py
    │   │   ├── build_dstc2_dataset.py
    │   │   ├── build_atis_dataset.py
    │   │   └── build_swda_dataset.py
    │   ├── prepare_data_and_model.py
    │   ├── define_predict_pack.py
    │   ├── optimization.py
    │   ├── define_paradigm.py
    │   ├── batching.py
    │   └── bert.py
    ├── data
    │   ├── input
    │   │   └── input.md
    │   ├── output
    │   │   └── output.md
    │   ├── inference_models
    │   │   └── inference_models.md
    │   ├── saved_models
    │   │   └── saved_models.md
    │   ├── pretrain_model
    │   │   └── pretrain_model.md
    │   └── config
    │   │   └── dgu.yaml
    ├── images
    │   └── dgu.png
    ├── eval.py
    ├── main.py
    ├── dgu_net.py
    ├── .run_ce.sh
    ├── _ce.py
    ├── inference_model.py
    ├── run.sh
    └── predict.py
├── .DS_Store
├── other
    └── ld.jpg
└── README.md


/ADE/ade/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/DAM/bin/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/DAM/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/DAM/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/DGU/dgu/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ADE/ade/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/DGU/dgu/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/DAM/log/ReadMe.txt:
--------------------------------------------------------------------------------
1 | store logs here.


--------------------------------------------------------------------------------
/DAM/output/ReadMe.txt:
--------------------------------------------------------------------------------
1 | store models here.


--------------------------------------------------------------------------------
/ADE/data/input/input.md:
--------------------------------------------------------------------------------
1 | training data directory
2 | 


--------------------------------------------------------------------------------
/DGU/data/input/input.md:
--------------------------------------------------------------------------------
1 | input train and test data directory
2 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/baidu/Dialogue/HEAD/.DS_Store


--------------------------------------------------------------------------------
/ADE/data/output/output.md:
--------------------------------------------------------------------------------
1 | save predict results output directory
2 | 


--------------------------------------------------------------------------------
/DGU/data/output/output.md:
--------------------------------------------------------------------------------
1 | save predict results output directory
2 | 


--------------------------------------------------------------------------------
/other/ld.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/baidu/Dialogue/HEAD/other/ld.jpg


--------------------------------------------------------------------------------
/ADE/data/inference_models/inference_models.md:
--------------------------------------------------------------------------------
1 | save inference model directory
2 | 


--------------------------------------------------------------------------------
/DAM/run.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | CUDA_VISIBLE_DEVICES=0 python main.py
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/DGU/data/inference_models/inference_models.md:
--------------------------------------------------------------------------------
1 | save inference model directory
2 | 


--------------------------------------------------------------------------------
/DGU/images/dgu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DGU/images/dgu.png


--------------------------------------------------------------------------------
/DAM/appendix/Figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DAM/appendix/Figure1.png


--------------------------------------------------------------------------------
/DAM/appendix/Figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/baidu/Dialogue/HEAD/DAM/appendix/Figure2.png


--------------------------------------------------------------------------------
/ADE/data/saved_models/saved_models.md:
--------------------------------------------------------------------------------
1 | save user finetuning models and trained model we provided
2 | 


--------------------------------------------------------------------------------
/DGU/data/saved_models/saved_models.md:
--------------------------------------------------------------------------------
1 | save user finetuning models and trained model we provided
2 | 


--------------------------------------------------------------------------------
/ADE/data/pretrain_model/pretrain_model.md:
--------------------------------------------------------------------------------
1 | pretrain model directory: model for network initialization
2 | 


--------------------------------------------------------------------------------
/DGU/data/pretrain_model/pretrain_model.md:
--------------------------------------------------------------------------------
1 | pretrain model directory: in this module, we use bert as pretrain model
2 | 


--------------------------------------------------------------------------------
/DAM/data/ReadMe.txt:
--------------------------------------------------------------------------------
1 | please download data from:
2 | 
3 | 	https://pan.baidu.com/s/1hakfuuwdS8xl7NyxlWzRiQ
4 | 
5 | and unzip it in this floder.
6 | 


--------------------------------------------------------------------------------
/ADE/data/config/ade.yaml:
--------------------------------------------------------------------------------
 1 | loss_type: "CLS"
 2 | training_file: ""
 3 | val_file: ""
 4 | predict_file: ""
 5 | print_steps: 10
 6 | save_steps: 10
 7 | num_scan_data: ""
 8 | word_emb_init: ""
 9 | init_model: ""
10 | use_cuda: True
11 | batch_size: 256
12 | hidden_size: 256
13 | emb_size: 256
14 | vocab_size: 484016
15 | sample_pro: 1.0
16 | output_prediction_file: ""
17 | init_from_checkpoint: ""
18 | init_from_params: ""
19 | init_from_pretrain_model: ""
20 | inference_model_dir: ""
21 | save_model_path: ""
22 | save_checkpoint: ""
23 | save_param: ""
24 | evaluation_file: ""
25 | vocab_path: ""
26 | max_seq_len: 128
27 | random_seed: 110
28 | do_save_inference_model: False
29 | enable_ce: ""
30 | 


--------------------------------------------------------------------------------
/DGU/data/config/dgu.yaml:
--------------------------------------------------------------------------------
 1 | task_name: ""
 2 | data_dir: ""
 3 | bert_config_path: ""
 4 | init_from_checkpoint: ""
 5 | init_from_params: ""
 6 | init_from_pretrain_model: ""
 7 | inference_model_dir: ""
 8 | save_model_path: ""
 9 | save_checkpoint: ""
10 | save_param: ""
11 | lr_scheduler: "linear_warmup_decay"
12 | weight_decay: 0.01
13 | warmup_proportion: 0.1
14 | save_steps: 1000
15 | use_fp16: False
16 | loss_scaling: 1.0
17 | print_steps: 20
18 | evaluation_file: ""
19 | output_prediction_file: ""
20 | vocab_path: ""
21 | max_seq_len: 128
22 | batch_size: 2
23 | verbose: False
24 | do_lower_case: False
25 | random_seed: 0
26 | use_cuda: True
27 | in_tokens: False
28 | do_save_inference_model: False
29 | enable_ce: ""
30 | 


--------------------------------------------------------------------------------
/DGU/dgu/scripts/README.md:
--------------------------------------------------------------------------------
 1 | scripts：运行数据处理脚本目录, 将官方公开数据集转换成模型所需训练数据格式
 2 | 运行命令：
 3 |   python run_build_data.py [udc|swda|mrda|atis|dstc2]
 4 | 
 5 | 1)、生成MATCHING任务所需要的训练集、开发集、测试集时:
 6 | python run_build_data.py udc
 7 | 生成数据在dialogue_general_understanding/data/input/data/udc
 8 | 
 9 | 2)、生成DA任务所需要的训练集、开发集、测试集时: 
10 |   python run_build_data.py swda
11 |   python run_build_data.py mrda
12 |   生成数据分别在dialogue_general_understanding/data/input/data/swda和dialogue_general_understanding/data/input/data/mrda
13 | 
14 | 3)、生成DST任务所需的训练集、开发集、测试集时:
15 |   python run_build_data.py dstc2
16 |   生成数据分别在dialogue_general_understanding/data/input/data/dstc2
17 | 
18 | 4)、生成意图解析, 槽位识别任务所需训练集、开发集、测试集时:
19 |   python run_build_data.py atis
20 |   生成槽位识别数据在dialogue_general_understanding/data/input/data/atis/atis_slot
21 |   生成意图识别数据在dialogue_general_understanding/data/input/data/atis/atis_intent
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/DGU/dgu/utils/py23.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import sys
17 | 
18 | if sys.version[0] == '2':
19 |     rt_tok = u'\n'
20 |     tab_tok = u'\t'
21 |     space_tok = u' '
22 | else:
23 |     rt_tok = '\n'
24 |     tab_tok = '\t'
25 |     space_tok = ' '
26 | 


--------------------------------------------------------------------------------
/DAM/utils/evaluation.py:
--------------------------------------------------------------------------------
 1 | import sys;
 2 | 
 3 | def get_p_at_n_in_m(data, n, m, ind):
 4 | 	pos_score = data[ind][0];
 5 | 	curr = data[ind:ind+m];
 6 | 	curr = sorted(curr, key = lambda x:x[0], reverse=True)
 7 | 
 8 | 	if curr[n-1][0] <= pos_score:
 9 | 		return 1;
10 | 	return 0;
11 | 
12 | def evaluate(file_path):
13 | 	data = []
14 | 	with open(file_path, 'r') as file:
15 | 		for line in file:
16 | 			line = line.strip();
17 | 			tokens = line.split("\t")
18 | 		
19 | 			if len(tokens) != 2:
20 | 				continue
21 | 		
22 | 			data.append((float(tokens[0]), int(tokens[1])));
23 | 		
24 | 	#assert len(data) % 10 == 0
25 | 	
26 | 	p_at_1_in_2 = 0.0
27 | 	p_at_1_in_10 = 0.0
28 | 	p_at_2_in_10 = 0.0
29 | 	p_at_5_in_10 = 0.0
30 | 
31 |         length = int(len(data)/10)
32 | 
33 | 	for i in xrange(0, length):
34 | 		ind = i * 10
35 | 		assert data[ind][1] == 1
36 | 	
37 | 		p_at_1_in_2 += get_p_at_n_in_m(data, 1, 2, ind)
38 | 		p_at_1_in_10 += get_p_at_n_in_m(data, 1, 10, ind)
39 | 		p_at_2_in_10 += get_p_at_n_in_m(data, 2, 10, ind)
40 | 		p_at_5_in_10 += get_p_at_n_in_m(data, 5, 10, ind)
41 | 
42 | 	return (p_at_1_in_2/length, p_at_1_in_10/length, p_at_2_in_10/length, p_at_5_in_10/length)
43 | 	
44 | 
45 | 


--------------------------------------------------------------------------------
/ADE/ade/utils/input_field.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import print_function
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import os
20 | import six
21 | import ast
22 | import copy
23 | 
24 | import numpy as np
25 | import paddle.fluid as fluid
26 | 
27 | 
28 | class InputField(object): 
29 |     def __init__(self, input_field): 
30 |         """init inpit field"""
31 |         self.context_wordseq = input_field[0]
32 |         self.response_wordseq = input_field[1]
33 |         self.labels = input_field[2]
34 | 


--------------------------------------------------------------------------------
/DGU/eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """evaluation"""
15 | 
16 | import os
17 | import sys
18 | 
19 | from dgu.evaluation import evaluate
20 | from dgu.utils.configure import PDConfig
21 | 
22 | 
23 | def do_eval(args): 
24 | 
25 |     task_name = args.task_name.lower()
26 |     reference = args.evaluation_file
27 |     predicitions = args.output_prediction_file
28 |     
29 |     evaluate(task_name, predicitions, reference)
30 | 
31 | 
32 | if __name__ == "__main__": 
33 |     
34 |     args = PDConfig(yaml_file="./data/config/dgu.yaml")
35 |     args.build()
36 | 
37 |     do_eval(args)
38 | 


--------------------------------------------------------------------------------
/DGU/dgu/scripts/conf/mrda.conf:
--------------------------------------------------------------------------------
 1 | train	Bdb001
 2 | train	Bed002
 3 | train	Bed004
 4 | train	Bed005
 5 | train	Bed008
 6 | train	Bed009
 7 | train	Bed011
 8 | train	Bed013
 9 | train	Bed014
10 | train	Bed015
11 | train	Bed017
12 | train	Bmr002
13 | train	Bmr003
14 | train	Bmr006
15 | train	Bmr007
16 | train	Bmr008
17 | train	Bmr009
18 | train	Bmr011
19 | train	Bmr012
20 | train	Bmr015
21 | train	Bmr016
22 | train	Bmr020
23 | train	Bmr021
24 | train	Bmr023
25 | train	Bmr025
26 | train	Bmr026
27 | train	Bmr027
28 | train	Bmr029
29 | train	Bmr031
30 | train	Bns001
31 | train	Bns002
32 | train	Bns003
33 | train	Bro003
34 | train	Bro005
35 | train	Bro007
36 | train	Bro010
37 | train	Bro012
38 | train	Bro013
39 | train	Bro015
40 | train	Bro016
41 | train	Bro017
42 | train	Bro019
43 | train	Bro022
44 | train	Bro023
45 | train	Bro025
46 | train	Bro026
47 | train	Bro028
48 | train	Bsr001
49 | train	Btr001
50 | train	Btr002
51 | train	Buw001
52 | dev	Bed003
53 | dev	Bed010
54 | dev	Bmr005
55 | dev	Bmr014
56 | dev	Bmr019
57 | dev	Bmr024
58 | dev	Bmr030
59 | dev	Bro004
60 | dev	Bro011
61 | dev	Bro018
62 | dev	Bro024
63 | test	Bed006
64 | test	Bed012
65 | test	Bed016
66 | test	Bmr001
67 | test	Bmr010
68 | test	Bmr022
69 | test	Bmr028
70 | test	Bro008
71 | test	Bro014
72 | test	Bro021
73 | test	Bro027
74 | 


--------------------------------------------------------------------------------
/DGU/dgu/utils/input_field.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import print_function
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import os
20 | import six
21 | import ast
22 | import copy
23 | 
24 | import numpy as np
25 | import paddle.fluid as fluid
26 | 
27 | 
28 | class InputField(object): 
29 |     def __init__(self, input_field): 
30 |         """init inpit field"""
31 |         self.src_ids = input_field[0]
32 |         self.pos_ids = input_field[1]
33 |         self.sent_ids = input_field[2]
34 |         self.input_mask = input_field[3]
35 |         self.labels = input_field[4]
36 | 


--------------------------------------------------------------------------------
/DAM/main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import time
 4 | 
 5 | import cPickle as pickle
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | 
 9 | import utils.reader as reader
10 | import models.net as net
11 | import utils.evaluation as eva
12 | #for douban
13 | #import utils.douban_evaluation as eva
14 | 
15 | import bin.train_and_evaluate as train
16 | import bin.test_and_evaluate as test
17 | 
18 | # configure
19 | 
20 | conf = {
21 |     "data_path": "./data/ubuntu/data.pkl",
22 |     "save_path": "./output/ubuntu/temp/",
23 |     "word_emb_init": "./data/word_embedding.pkl",
24 |     "init_model": None, #should be set for test
25 | 
26 |     "rand_seed": None, 
27 | 
28 |     "drop_dense": None,
29 |     "drop_attention": None,
30 | 
31 |     "is_mask": True,
32 |     "is_layer_norm": True,
33 |     "is_positional": False,  
34 | 
35 |     "stack_num": 5,  
36 |     "attention_type": "dot",
37 | 
38 |     "learning_rate": 1e-3,
39 |     "vocab_size": 434512,
40 |     "emb_size": 200,
41 |     "batch_size": 256, #200 for test
42 | 
43 |     "max_turn_num": 9,  
44 |     "max_turn_len": 50, 
45 | 
46 |     "max_to_keep": 1,
47 |     "num_scan_data": 2,
48 |     "_EOS_": 28270, #1 for douban data
49 |     "final_n_class": 1,
50 | }
51 | 
52 | 
53 | model = net.Net(conf)
54 | train.train(conf, model)
55 | 
56 | #test and evaluation, init_model in conf should be set
57 | #test.test(conf, model)
58 | 
59 | 


--------------------------------------------------------------------------------
/DGU/main.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import sys
17 | import numpy as np
18 | 
19 | import paddle
20 | import paddle.fluid as fluid
21 | 
22 | from eval import do_eval
23 | from train import do_train
24 | from predict import do_predict
25 | from inference_model import do_save_inference_model
26 | 
27 | from dgu.utils.configure import PDConfig
28 | 
29 | 
30 | if __name__ == "__main__":
31 | 
32 |     args = PDConfig(yaml_file="./data/config/dgu.yaml")
33 |     args.build()
34 |     args.Print()
35 | 
36 |     if args.do_train:
37 |         do_train(args)
38 | 
39 |     if args.do_predict:
40 |         do_predict(args)
41 | 
42 |     if args.do_eval:
43 |         do_eval(args)
44 | 
45 |     if args.do_save_inference_model:
46 |         do_save_inference_model(args)
47 | 
48 | # vim: set ts=4 sw=4 sts=4 tw=100:
49 | 


--------------------------------------------------------------------------------
/ADE/ade/utils/model_check.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import paddle
17 | import paddle.fluid as fluid
18 | 
19 | 
20 | def check_cuda(use_cuda, err = \
21 |     "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
22 |     Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
23 |                                                                                                                      ):
24 |     try:
25 |         if use_cuda == True and fluid.is_compiled_with_cuda() == False:
26 |             print(err)
27 |             sys.exit(1)
28 |     except Exception as e:
29 |         pass
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     
34 |     check_cuda(True)
35 | 
36 |     check_cuda(False)
37 | 
38 |     check_cuda(True, "This is only for testing.")
39 | 


--------------------------------------------------------------------------------
/DGU/dgu/utils/model_check.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import paddle
17 | import paddle.fluid as fluid
18 | 
19 | 
20 | def check_cuda(use_cuda, err = \
21 |     "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
22 |     Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
23 |                                                                                                                      ):
24 |     try:
25 |         if use_cuda == True and fluid.is_compiled_with_cuda() == False:
26 |             print(err)
27 |             sys.exit(1)
28 |     except Exception as e:
29 |         pass
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     
34 |     check_cuda(True)
35 | 
36 |     check_cuda(False)
37 | 
38 |     check_cuda(True, "This is only for testing.")
39 | 


--------------------------------------------------------------------------------
/ADE/main.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import sys
17 | import numpy as np
18 | 
19 | import paddle
20 | import paddle.fluid as fluid
21 | 
22 | from eval import do_eval
23 | from train import do_train
24 | from predict import do_predict
25 | from inference_model import do_save_inference_model
26 | 
27 | from ade.utils.configure import PDConfig
28 | 
29 | 
30 | if __name__ == "__main__":
31 | 
32 |     args = PDConfig(yaml_file="./data/config/ade.yaml")
33 |     args.build()
34 |     args.Print()
35 | 
36 |     if args.do_train:
37 |         do_train(args)
38 | 
39 |     if args.do_predict:
40 |         do_predict(args)
41 | 
42 |     if args.do_eval:
43 |         do_eval(args)
44 | 
45 |     if args.do_save_inference_model:
46 |         do_save_inference_model(args)
47 | 
48 | # vim: set ts=4 sw=4 sts=4 tw=100:
49 | 


--------------------------------------------------------------------------------
/ADE/.run_ce.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export FLAGS_sync_nccl_allreduce=0
 4 | export FLAGS_eager_delete_tensor_gb=1.0
 5 | 
 6 | export CUDA_VISIBLE_DEVICES=0
 7 | 
 8 | pretrain_model_path="data/saved_models/matching_pretrained"
 9 | if [ ! -d ${pretrain_model_path} ]
10 | then
11 |      mkdir ${pretrain_model_path}
12 | fi
13 | 
14 | python -u main.py \
15 |       --do_train=true \
16 |       --use_cuda=true \
17 |       --loss_type="CLS" \
18 |       --max_seq_len=50 \
19 |       --save_model_path="data/saved_models/matching_pretrained" \
20 |       --save_param="params" \
21 |       --training_file="data/input/data/unlabel_data/train.ids" \
22 |       --epoch=3 \
23 |       --print_step=1 \
24 |       --save_step=400 \
25 |       --batch_size=256 \
26 |       --hidden_size=256 \
27 |       --emb_size=256 \
28 |       --vocab_size=484016 \
29 |       --learning_rate=0.001 \
30 |       --sample_pro=0.1 \
31 |       --enable_ce="store_true" | python _ce.py
32 | 
33 | 
34 | export CUDA_VISIBLE_DEVICES=0,1,2,3
35 | 
36 | python -u main.py \
37 |       --do_train=true \
38 |       --use_cuda=true \
39 |       --loss_type="CLS" \
40 |       --max_seq_len=50 \
41 |       --save_model_path="data/saved_models/matching_pretrained" \
42 |       --save_param="params" \
43 |       --training_file="data/input/data/unlabel_data/train.ids" \
44 |       --epoch=3 \
45 |       --print_step=1 \
46 |       --save_step=400 \
47 |       --batch_size=256 \
48 |       --hidden_size=256 \
49 |       --emb_size=256 \
50 |       --vocab_size=484016 \
51 |       --learning_rate=0.001 \
52 |       --sample_pro=0.1 \
53 |       --enable_ce="store_true" | python _ce.py
54 | 
55 | 


--------------------------------------------------------------------------------
/DGU/dgu/scripts/run_build_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import shutil
15 | import sys
16 | import os
17 | 
18 | from build_atis_dataset import ATIS
19 | from build_dstc2_dataset import DSTC2
20 | from build_mrda_dataset import MRDA
21 | from build_swda_dataset import SWDA
22 | 
23 | 
24 | if __name__ == "__main__": 
25 |     task_name = sys.argv[1]
26 |     task_name = task_name.lower()
27 |     
28 |     if task_name not in ['swda', 'mrda', 'atis', 'dstc2', 'udc']: 
29 |         print("task name error: we support [swda|mrda|atis|dstc2|udc]")
30 |         exit(1)
31 |     
32 |     if task_name == 'swda': 
33 |         swda_inst = SWDA()
34 |         swda_inst.main()
35 |     elif task_name == 'mrda': 
36 |         mrda_inst = MRDA()
37 |         mrda_inst.main()
38 |     elif task_name == 'atis': 
39 |         atis_inst = ATIS()
40 |         atis_inst.main()
41 |         shutil.copyfile("../../data/input/data/atis/atis_slot/test.txt", "../../data/input/data/atis/atis_slot/dev.txt")
42 |         shutil.copyfile("../../data/input/data/atis/atis_intent/test.txt", "../../data/input/data/atis/atis_intent/dev.txt")
43 |     elif task_name == 'dstc2': 
44 |         dstc_inst = DSTC2()
45 |         dstc_inst.main()
46 |     else: 
47 |         exit(0)
48 | 
49 | 


--------------------------------------------------------------------------------
/DGU/dgu_net.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Create model for dialogue task."""
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import paddle.fluid as fluid
21 | 
22 | from dgu.bert import BertModel
23 | from dgu.utils.configure import JsonConfig
24 | 
25 | 
26 | def create_net(
27 |         is_training,
28 |         model_input,
29 |         num_labels,
30 |         paradigm_inst,
31 |         args): 
32 |     """create dialogue task model"""
33 |     
34 |     src_ids = model_input.src_ids
35 |     pos_ids = model_input.pos_ids
36 |     sent_ids = model_input.sent_ids
37 |     input_mask = model_input.input_mask
38 |     labels = model_input.labels
39 | 
40 |     assert isinstance(args.bert_config_path, str)
41 | 
42 |     bert_conf = JsonConfig(args.bert_config_path)
43 |     bert = BertModel(
44 |         src_ids=src_ids,
45 |         position_ids=pos_ids,
46 |         sentence_ids=sent_ids,
47 |         input_mask=input_mask,
48 |         config=bert_conf,
49 |         use_fp16=False)
50 | 
51 |     params = {'num_labels': num_labels,
52 |               'src_ids': src_ids,
53 |               'pos_ids': pos_ids,
54 |               'sent_ids': sent_ids,
55 |               'input_mask': input_mask,
56 |               'labels': labels,
57 |               'is_training': is_training}
58 | 
59 |     results = paradigm_inst.paradigm(bert, params)
60 |     return results
61 | 
62 | 


--------------------------------------------------------------------------------
/DGU/dgu/scripts/commonlib.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """common function"""
16 | import sys
17 | import io
18 | import os
19 | 
20 | 
21 | def get_file_list(dir_name):
22 |     """
23 |     get file list in directory
24 |     """
25 |     file_list = list()
26 |     file_path = list()
27 |     for root, dirs, files in os.walk(dir_name):
28 |         for file in files: 
29 |             file_list.append(file)
30 |             file_path.append(os.path.join(root, file))
31 |     return file_list, file_path
32 | 
33 | 
34 | def get_dir_list(dir_name): 
35 |     """
36 |     get directory names
37 |     """
38 |     child_dir = []
39 |     dir_list = os.listdir(dir_name)
40 |     for cur_file in dir_list: 
41 |         path = os.path.join(dir_name, cur_file)
42 |         if not os.path.isdir(path): 
43 |             continue
44 |         child_dir.append(path)
45 |     return child_dir
46 | 
47 | 
48 | def load_dict(conf): 
49 |     """
50 |     load swda dataset config
51 |     """
52 |     conf_dict = dict()
53 |     fr = io.open(conf, 'r', encoding="utf8")
54 |     for line in fr: 
55 |         line = line.strip()
56 |         elems = line.split('\t')
57 |         if elems[0] not in conf_dict: 
58 |             conf_dict[elems[0]] = []
59 |         conf_dict[elems[0]].append(elems[1])
60 |     return conf_dict
61 | 
62 | 
63 | def load_voc(conf): 
64 |     """
65 |     load map dict
66 |     """
67 |     map_dict = {}
68 |     fr = io.open(conf, 'r', encoding="utf8")
69 |     for line in fr:   
70 |         line = line.strip()
71 |         elems = line.split('\t')
72 |         map_dict[elems[0]] = elems[1]
73 |     return map_dict
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/ADE/ade/evaluate.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Evaluation for auto dialogue evaluation"""
15 | 
16 | import sys
17 | import numpy as np
18 | import pandas as pd
19 | 
20 | 
21 | def get_p_at_n_in_m(data, n, m, ind):
22 |     """
23 |     Get n in m
24 |     """
25 |     pos_score = data[ind][0]
26 |     curr = data[ind:ind + m]
27 |     curr = sorted(curr, key=lambda x: x[0], reverse=True)
28 | 
29 |     if curr[n - 1][0] <= pos_score:
30 |         return 1
31 |     return 0
32 | 
33 | 
34 | def evaluate_Recall(data):
35 |     """
36 |     Evaluate Recall
37 |     """
38 |     p_at_1_in_2 = 0.0
39 |     p_at_1_in_10 = 0.0
40 |     p_at_2_in_10 = 0.0
41 |     p_at_5_in_10 = 0.0
42 | 
43 |     length = len(data) // 10
44 |     print('length=%s' % length)
45 | 
46 |     for i in range(0, length):
47 |         ind = i * 10
48 |         assert data[ind][1] == 1
49 | 
50 |         p_at_1_in_2 += get_p_at_n_in_m(data, 1, 2, ind)
51 |         p_at_1_in_10 += get_p_at_n_in_m(data, 1, 10, ind)
52 |         p_at_2_in_10 += get_p_at_n_in_m(data, 2, 10, ind)
53 |         p_at_5_in_10 += get_p_at_n_in_m(data, 5, 10, ind)
54 | 
55 |     recall_dict = {
56 |         '1_in_2': p_at_1_in_2 / length,
57 |         '1_in_10': p_at_1_in_10 / length,
58 |         '2_in_10': p_at_2_in_10 / length,
59 |         '5_in_10': p_at_5_in_10 / length
60 |     }
61 | 
62 |     return recall_dict
63 | 
64 | 
65 | def evaluate_cor(pred, true):
66 |     """
67 |     Evaluate cor
68 |     """
69 |     df = pd.DataFrame({'pred': pred, 'true': true})
70 |     cor_matrix = df.corr('spearman')
71 |     return cor_matrix['pred']['true']
72 | 


--------------------------------------------------------------------------------
/DGU/.run_ce.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | train_atis_slot(){ 
 4 |   if [ ! -d "./data/saved_models/atis_slot" ]; then
 5 |       mkdir "./data/saved_models/atis_slot"
 6 |   fi
 7 |   python -u train.py \
 8 |   --task_name=atis_slot \
 9 |   --use_cuda=true \
10 |   --do_train=true \
11 |   --in_tokens=false \
12 |   --epoch=2 \
13 |   --batch_size=32 \
14 |   --data_dir=./data/input/data/atis/atis_slot \
15 |   --bert_config_path=./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json \
16 |   --vocab_path=./data/pretrain_model/uncased_L-12_H-768_A-12/vocab.txt \
17 |   --init_from_pretrain_model=./data/pretrain_model/uncased_L-12_H-768_A-12/params \
18 |   --save_model_path=./data/saved_models/atis_slot \
19 |   --save_param="params" \
20 |   --save_steps=100 \
21 |   --learning_rate=2e-5 \
22 |   --weight_decay=0.01 \
23 |   --max_seq_len=128 \
24 |   --print_steps=10 \
25 |   --use_fp16=false \
26 |   --enable_ce=store_true 
27 | }
28 | 
29 | train_mrda(){
30 |   if [ ! -d "./data/saved_models/mrda" ]; then
31 |       mkdir "./data/saved_models/mrda"
32 |   fi
33 |   python -u train.py \
34 |   --task_name=mrda \
35 |   --use_cuda=true \
36 |   --do_train=true \
37 |   --in_tokens=true \
38 |   --epoch=2 \
39 |   --batch_size=4096 \
40 |   --data_dir=./data/input/data/mrda \
41 |   --bert_config_path=./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json \
42 |   --vocab_path=./data/pretrain_model/uncased_L-12_H-768_A-12/vocab.txt \
43 |   --init_from_pretrain_model=./data/pretrain_model/uncased_L-12_H-768_A-12/params \
44 |   --save_model_path=./data/saved_models/mrda \
45 |   --save_param="params" \
46 |   --save_steps=500 \
47 |   --learning_rate=2e-5 \
48 |   --weight_decay=0.01 \
49 |   --max_seq_len=128 \
50 |   --print_steps=200 \
51 |   --use_fp16=false \
52 |   --enable_ce=store_true 
53 | }
54 | 
55 | # FIXME(zjl): this model would fail when GC is enabled,
56 | # but it seems that this error is from the model itself.
57 | # See issue here: https://github.com/PaddlePaddle/Paddle/issues/18994#event-2532039900       
58 | # To fix ce, disable gc in this model temporarily.  
59 | export FLAGS_eager_delete_tensor_gb=1
60 | 
61 | cudaid=${multi:=0,1,2,3}
62 | export CUDA_VISIBLE_DEVICES=$cudaid
63 | train_atis_slot | python _ce.py
64 | sleep 20
65 | 
66 | cudaid=${single:=0}
67 | export CUDA_VISIBLE_DEVICES=$cudaid
68 | train_atis_slot | python _ce.py
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 2 | 
 3 | # New record is achieved by ERNIE_English (2019/06/13)
 4 |   We got the new, best score of R_10 at 1 (**85.67%**) in the Ubuntu Corpus by incorporating ERNIE_English, an English pre-trained model from Baidu. 
 5 |   Please refer to DMTK (the Dialogue Modeling ToolKit) for more details.
 6 |   https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/PaddleDialogue
 7 | 
 8 | 
 9 | # Baidu NLP Dialogue team
10 |   The dialogue team, at Baidu NLP, is a group of engineers and researchers who truly trust in technology and work together to accelerate the development of open-domain dialogues.
11 |   
12 |   Our battlefields include but not limited to the fundamental technology of neural dialogue system (seq2seq generation or context-response matching), knowledge-driven dialogue, life-long learning dialogue system with reinforcement-learning, and also we provide the system-level solution for open-domain chatbots. 
13 |   
14 |   Together we built the largest Chinese Human-Computer conversation systems and support many businesses such as DuerOS, the largest Chatbot in China, our life-long learning system interacts with hundreds of millions of Chinese users every day and learns through imitation/user-feedback, distilling knowledge from the conversation and learning to be smarter.
15 |   
16 |   We will release some source code of our previous work in the future, to make some small contribution to the whole community of human-computer conversation.
17 |  
18 | # Publication
19 | > + [Proactive Human-Machine Conversation with Explicit Conversation Goals](https://arxiv.org/abs/1906.05572). *ACL 2019, Full Paper, poster*
20 | > + [Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network](http://aclweb.org/anthology/P18-1103). *ACL 2018, Full Paper, oral*
21 | > + [Multi-View Response Selection for Human-Computer Conversation](http://www.aclweb.org/anthology/D16-1036). *EMNLP 2016, Full Paper, poster*
22 | > + [Shall I be Your Chat Companion towards an Online Human-Computer Conversation System](http://research.baidu.com/Public/uploads/5acc2a6723f1d.pdf). *CIKM 2016, Full Paper, oral*
23 |  
24 | # Connected to our Chatbot Service
25 |   Any Chinese developers can enable their own smart devices to talk with customers on open-domain topics by using our open chatbot service. Please find the usage manual at http://ai.baidu.com/forum/topic/show/497679 (in Chinese).
26 |  
27 | 


--------------------------------------------------------------------------------
/ADE/_ce.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """this file is only used for continuous evaluation test!"""
15 | 
16 | import os
17 | import sys
18 | sys.path.append(os.environ['ceroot'])
19 | from kpi import CostKpi
20 | from kpi import DurationKpi
21 | 
22 | train_loss_card1 = CostKpi('train_loss_card1', 0.03, 0, actived=True)
23 | train_loss_card4 = CostKpi('train_loss_card4', 0.03, 0, actived=True)
24 | train_duration_card1 = DurationKpi('train_duration_card1', 0.01, 0, actived=True)
25 | train_duration_card4 = DurationKpi('train_duration_card4', 0.01, 0, actived=True)
26 | 
27 | tracking_kpis = [
28 |         train_loss_card1,
29 |         train_loss_card4,
30 |         train_duration_card1,
31 |         train_duration_card4,
32 | ]
33 | 
34 | 
35 | def parse_log(log):
36 |     '''
37 |     This method should be implemented by model developers.
38 | 
39 |     The suggestion:
40 | 
41 |     each line in the log should be key, value, for example:
42 | 
43 |     "
44 |     train_cost\t1.0
45 |     test_cost\t1.0
46 |     train_cost\t1.0
47 |     train_cost\t1.0
48 |     train_acc\t1.2
49 |     "
50 |     '''
51 |     for line in log.split('\n'):
52 |         fs = line.strip().split('\t')
53 |         print(fs)
54 |         if len(fs) == 3 and fs[0] == 'kpis':
55 |             kpi_name = fs[1]
56 |             kpi_value = float(fs[2])
57 |             yield kpi_name, kpi_value
58 | 
59 | 
60 | def log_to_ce(log):
61 |     kpi_tracker = {}
62 |     for kpi in tracking_kpis:
63 |         kpi_tracker[kpi.name] = kpi
64 | 
65 |     for (kpi_name, kpi_value) in parse_log(log):
66 |         print(kpi_name, kpi_value)
67 |         kpi_tracker[kpi_name].add_record(kpi_value)
68 |         kpi_tracker[kpi_name].persist()
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     log = sys.stdin.read()
73 |     log_to_ce(log)
74 | 


--------------------------------------------------------------------------------
/ADE/ade/prepare_data_and_model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import tarfile
17 | import shutil
18 | import urllib
19 | import sys
20 | import io
21 | import os
22 | 
23 | URLLIB=urllib
24 | if sys.version_info >= (3, 0): 
25 |     import urllib.request
26 |     URLLIB=urllib.request
27 | 
28 | DATA_MODEL_PATH = {"DATA_PATH": "https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz", 
29 |                    "TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.2.0.0.tar.gz"} 
30 | 
31 | PATH_MAP = {'DATA_PATH': "./data/input", 
32 |             'TRAINED_MODEL': './data/saved_models'}
33 | 
34 | 
35 | def un_tar(tar_name, dir_name): 
36 |     try: 
37 |         t = tarfile.open(tar_name)
38 |         t.extractall(path = dir_name)
39 |         return True
40 |     except Exception as e:
41 |         print(e)
42 |         return False
43 | 
44 | 
45 | def download_model_and_data(): 
46 |     print("Downloading ade data, pretrain model and trained models......")
47 |     print("This process is quite long, please wait patiently............")
48 |     for path in ['./data/input/data', './data/saved_models/trained_models']: 
49 |         if not os.path.exists(path): 
50 |             continue
51 |         shutil.rmtree(path)
52 |     for path_key in DATA_MODEL_PATH: 
53 |         filename = os.path.basename(DATA_MODEL_PATH[path_key])
54 |         URLLIB.urlretrieve(DATA_MODEL_PATH[path_key], os.path.join("./", filename))
55 |         state = un_tar(filename, PATH_MAP[path_key])
56 |         if not state: 
57 |             print("Tar %s error....." % path_key)
58 |             return False
59 |         os.remove(filename)
60 |     return True
61 | 
62 | 
63 | if __name__ == "__main__": 
64 |     state = download_model_and_data()
65 |     if not state: 
66 |         exit(1)
67 |     print("Downloading data and models sucess......")
68 | 


--------------------------------------------------------------------------------
/ADE/eval.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """evaluation metrics"""
16 | 
17 | import io
18 | import os
19 | import sys
20 | import numpy as np
21 | 
22 | import ade.evaluate as evaluate
23 | from ade.utils.configure import PDConfig
24 | 
25 | 
26 | def do_eval(args): 
27 |     """evaluate metrics"""
28 |     labels = []
29 |     fr = io.open(args.evaluation_file, 'r', encoding="utf8")
30 |     for line in fr: 
31 |         tokens = line.strip().split('\t')
32 |         assert len(tokens) == 3 
33 |         label = int(tokens[2])
34 |         labels.append(label)
35 | 
36 |     scores = []
37 |     fr = io.open(args.output_prediction_file, 'r', encoding="utf8")
38 |     for line in fr:
39 |         tokens = line.strip().split('\t')
40 |         assert len(tokens) == 2
41 |         score = tokens[1].strip("[]").split()
42 |         score = np.array(score)
43 |         score = score.astype(np.float64)
44 |         scores.append(score)
45 | 
46 |     if args.loss_type == 'CLS': 
47 |         recall_dict = evaluate.evaluate_Recall(list(zip(scores, labels)))
48 |         mean_score = sum(scores) / len(scores)
49 |         print('mean score: %.6f' % mean_score)
50 |         print('evaluation recall result:')
51 |         print('1_in_2: %.6f\t1_in_10: %.6f\t2_in_10: %.6f\t5_in_10: %.6f' %
52 |              (recall_dict['1_in_2'], recall_dict['1_in_10'],
53 |              recall_dict['2_in_10'], recall_dict['5_in_10']))
54 |     elif args.loss_type == 'L2': 
55 |         scores = [x[0] for x in scores]
56 |         mean_score = sum(scores) / len(scores)
57 |         cor = evaluate.evaluate_cor(scores, labels)
58 |         print('mean score: %.6f\nevaluation cor results:%.6f' %
59 |             (mean_score, cor))
60 |     else:
61 |         raise ValueError
62 |     
63 | 
64 | if __name__ == "__main__": 
65 |     args = PDConfig(yaml_file="./data/config/ade.yaml")
66 |     args.build()
67 | 
68 |     do_eval(args)
69 | 


--------------------------------------------------------------------------------
/DGU/dgu/prepare_data_and_model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import tarfile
17 | import shutil
18 | import urllib
19 | import sys
20 | import io
21 | import os
22 | 
23 | 
24 | URLLIB=urllib
25 | if sys.version_info >= (3, 0): 
26 |     import urllib.request
27 |     URLLIB=urllib.request
28 | 
29 | DATA_MODEL_PATH = {"DATA_PATH": "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz", 
30 |                    "PRETRAIN_MODEL": "https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz", 
31 |                    "TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/dgu_models_2.0.0.tar.gz"} 
32 | 
33 | PATH_MAP = {'DATA_PATH': "./data/input", 
34 |             'PRETRAIN_MODEL': './data/pretrain_model', 
35 |             'TRAINED_MODEL': './data/saved_models'}
36 | 
37 | 
38 | def un_tar(tar_name, dir_name): 
39 |     try: 
40 |         t = tarfile.open(tar_name)
41 |         t.extractall(path = dir_name)
42 |         return True
43 |     except Exception as e:
44 |         print(e)
45 |         return False
46 | 
47 | 
48 | def download_model_and_data(): 
49 |     print("Downloading dgu data, pretrain model and trained models......")
50 |     print("This process is quite long, please wait patiently............")
51 |     for path in ['./data/input/data', './data/pretrain_model/uncased_L-12_H-768_A-12', './data/saved_models/trained_models']: 
52 |         if not os.path.exists(path): 
53 |             continue
54 |         shutil.rmtree(path)
55 |     for path_key in DATA_MODEL_PATH: 
56 |         filename = os.path.basename(DATA_MODEL_PATH[path_key])
57 |         URLLIB.urlretrieve(DATA_MODEL_PATH[path_key], os.path.join("./", filename))
58 |         state = un_tar(filename, PATH_MAP[path_key])
59 |         if not state: 
60 |             print("Tar %s error....." % path_key)
61 |             return False
62 |         os.remove(filename)
63 |     return True
64 | 
65 | 
66 | if __name__ == "__main__": 
67 |     state = download_model_and_data()
68 |     if not state: 
69 |         exit(1)
70 |     print("Downloading data and models sucess......")
71 | 


--------------------------------------------------------------------------------
/DGU/dgu/define_predict_pack.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """define prediction results"""
15 | 
16 | import re
17 | import sys
18 | import numpy as np
19 | 
20 | import paddle
21 | import paddle.fluid as fluid
22 | 
23 | 
24 | class DefinePredict(object): 
25 |     """
26 |     Packaging Prediction Results
27 |     """
28 |     def __init__(self): 
29 |         """
30 |         init
31 |         """
32 |         self.task_map = {'udc': 'get_matching_res', 
33 |                          'swda': 'get_cls_res', 
34 |                          'mrda': 'get_cls_res', 
35 |                          'atis_intent': 'get_cls_res',
36 |                          'atis_slot': 'get_sequence_tagging',
37 |                          'dstc2': 'get_multi_cls_res', 
38 |                          'dstc2_asr': 'get_multi_cls_res', 
39 |                          'multi-woz': 'get_multi_cls_res'}
40 | 
41 |     def get_matching_res(self, probs, params=None): 
42 |         """
43 |         get matching score
44 |         """
45 |         probs = list(probs)
46 |         return probs[1]
47 | 
48 |     def get_cls_res(self, probs, params=None): 
49 |         """
50 |         get da classify tag
51 |         """
52 |         probs = list(probs)
53 |         max_prob = max(probs)
54 |         tag = probs.index(max_prob)
55 |         return tag
56 | 
57 |     def get_sequence_tagging(self, probs, params=None): 
58 |         """
59 |         get sequence tagging tag
60 |         """
61 |         labels = []
62 |         batch_labels = np.array(probs).reshape(-1, params)
63 |         labels = [" ".join([str(l) for l in list(l_l)]) for l_l in batch_labels]
64 |         return labels
65 | 
66 |     def get_multi_cls_res(self, probs, params=None): 
67 |         """
68 |         get dst classify tag
69 |         """
70 |         labels = []
71 |         probs = list(probs)
72 |         for i in range(len(probs)): 
73 |             if probs[i] >= 0.5: 
74 |                 labels.append(i)
75 |         if not labels: 
76 |             max_prob = max(probs)
77 |             label_str = str(probs.index(max_prob))
78 |         else: 
79 |             label_str = " ".join([str(l) for l in sorted(labels)])
80 | 
81 |         return label_str
82 | 
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/DGU/_ce.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """this file is only used for continuous evaluation test!"""
15 | 
16 | import os
17 | import sys
18 | sys.path.append(os.environ['ceroot'])
19 | from kpi import CostKpi
20 | from kpi import DurationKpi
21 | from kpi import AccKpi
22 | 
23 | each_step_duration_atis_slot_card1 = DurationKpi('each_step_duration_atis_slot_card1', 0.01, 0, actived=True)
24 | train_loss_atis_slot_card1 = CostKpi('train_loss_atis_slot_card1', 0.08, 0, actived=True)
25 | train_acc_atis_slot_card1 = CostKpi('train_acc_atis_slot_card1', 0.01, 0, actived=True)
26 | each_step_duration_atis_slot_card4 = DurationKpi('each_step_duration_atis_slot_card4', 0.06, 0, actived=True)
27 | train_loss_atis_slot_card4 = CostKpi('train_loss_atis_slot_card4', 0.03, 0, actived=True)
28 | train_acc_atis_slot_card4 = CostKpi('train_acc_atis_slot_card4', 0.01, 0, actived=True)
29 | 
30 | tracking_kpis = [
31 |         each_step_duration_atis_slot_card1,
32 |         train_loss_atis_slot_card1,
33 |         train_acc_atis_slot_card1,
34 |         each_step_duration_atis_slot_card4,
35 |         train_loss_atis_slot_card4,
36 |         train_acc_atis_slot_card4,
37 | ]
38 | 
39 | 
40 | def parse_log(log):
41 |     '''
42 |     This method should be implemented by model developers.
43 | 
44 |     The suggestion:
45 | 
46 |     each line in the log should be key, value, for example:
47 | 
48 |     "
49 |     train_cost\t1.0
50 |     test_cost\t1.0
51 |     train_cost\t1.0
52 |     train_cost\t1.0
53 |     train_acc\t1.2
54 |     "
55 |     '''
56 |     for line in log.split('\n'):
57 |         fs = line.strip().split('\t')
58 |         print(fs)
59 |         if len(fs) == 3 and fs[0] == 'kpis':
60 |             kpi_name = fs[1]
61 |             kpi_value = float(fs[2])
62 |             yield kpi_name, kpi_value
63 | 
64 | 
65 | def log_to_ce(log):
66 |     kpi_tracker = {}
67 |     for kpi in tracking_kpis:
68 |         kpi_tracker[kpi.name] = kpi
69 | 
70 |     for (kpi_name, kpi_value) in parse_log(log):
71 |         print(kpi_name, kpi_value)
72 |         kpi_tracker[kpi_name].add_record(kpi_value)
73 |         kpi_tracker[kpi_name].persist()
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     log = sys.stdin.read()
78 |     log_to_ce(log)
79 | 


--------------------------------------------------------------------------------
/DAM/utils/douban_evaluation.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | from sklearn.metrics import average_precision_score
 4 | 
 5 | def mean_average_precision(sort_data):
 6 |     #to do
 7 |     count_1 = 0
 8 |     sum_precision = 0
 9 |     for index in range(len(sort_data)):
10 |         if sort_data[index][1] == 1:
11 |             count_1 += 1
12 |             sum_precision += 1.0 * count_1 / (index+1)
13 |     return sum_precision / count_1
14 | 
15 | def mean_reciprocal_rank(sort_data):
16 |     sort_lable = [s_d[1] for s_d in sort_data]
17 |     assert 1 in sort_lable
18 |     return 1.0 / (1 + sort_lable.index(1))
19 | 
20 | def precision_at_position_1(sort_data):
21 |     if sort_data[0][1] == 1:
22 |         return 1
23 |     else:
24 |         return 0
25 | 
26 | def recall_at_position_k_in_10(sort_data, k):
27 |     sort_lable = [s_d[1] for s_d in sort_data]
28 |     select_lable = sort_lable[:k]
29 |     return 1.0 * select_lable.count(1) / sort_lable.count(1)
30 | 
31 | def evaluation_one_session(data):
32 |     sort_data = sorted(data, key=lambda x: x[0], reverse=True)
33 |     m_a_p = mean_average_precision(sort_data)
34 |     m_r_r = mean_reciprocal_rank(sort_data)
35 |     p_1 = precision_at_position_1(sort_data)
36 |     r_1 = recall_at_position_k_in_10(sort_data, 1)
37 |     r_2 = recall_at_position_k_in_10(sort_data, 2)
38 |     r_5 = recall_at_position_k_in_10(sort_data, 5)
39 |     return m_a_p, m_r_r, p_1, r_1, r_2, r_5
40 | 
41 | def evaluate(file_path):
42 |     sum_m_a_p = 0
43 |     sum_m_r_r = 0
44 |     sum_p_1 = 0
45 |     sum_r_1 = 0
46 |     sum_r_2 = 0
47 |     sum_r_5 = 0
48 | 
49 |     i = 0
50 |     total_num = 0
51 |     with open(file_path, 'r') as infile:
52 |         for line in infile:
53 |             if i % 10 == 0:
54 |                 data = []
55 |             
56 |             tokens = line.strip().split('\t')
57 |             data.append((float(tokens[0]), int(tokens[1])))
58 | 
59 |             if i % 10 == 9:
60 |                 total_num += 1
61 |                 m_a_p, m_r_r, p_1, r_1, r_2, r_5 = evaluation_one_session(data)
62 |                 sum_m_a_p += m_a_p
63 |                 sum_m_r_r += m_r_r
64 |                 sum_p_1 += p_1
65 |                 sum_r_1 += r_1
66 |                 sum_r_2 += r_2
67 |                 sum_r_5 += r_5
68 | 
69 |             i += 1
70 | 
71 |     #print('total num: %s' %total_num)
72 |     #print('MAP: %s' %(1.0*sum_m_a_p/total_num))
73 |     #print('MRR: %s' %(1.0*sum_m_r_r/total_num))
74 |     #print('P@1: %s' %(1.0*sum_p_1/total_num))
75 |     return (1.0*sum_m_a_p/total_num, 1.0*sum_m_r_r/total_num, 1.0*sum_p_1/total_num, 
76 |             1.0*sum_r_1/total_num, 1.0*sum_r_2/total_num, 1.0*sum_r_5/total_num)
77 | 
78 | if __name__ == '__main__':
79 |     result = evaluate(sys.argv[1])
80 |     for r in result:
81 |         print(r)
82 | 
83 | 
84 | 
85 | 
86 |         
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/DAM/bin/test_and_evaluate.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import time
 4 | 
 5 | import cPickle as pickle
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | 
 9 | import utils.reader as reader
10 | import utils.evaluation as eva
11 | 
12 | 
13 | def test(conf, _model):
14 |     
15 |     if not os.path.exists(conf['save_path']):
16 |         os.makedirs(conf['save_path'])
17 | 
18 |     # load data
19 |     print('starting loading data')
20 |     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
21 |     train_data, val_data, test_data = pickle.load(open(conf["data_path"], 'rb'))    
22 |     print('finish loading data')
23 | 
24 |     test_batches = reader.build_batches(test_data, conf)
25 | 
26 |     print("finish building test batches")
27 |     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
28 | 
29 |     # refine conf
30 |     test_batch_num = len(test_batches["response"])
31 | 
32 |     print('configurations: %s' %conf)
33 | 
34 | 
35 |     _graph = _model.build_graph()
36 |     print('build graph sucess')
37 |     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
38 | 
39 |     with tf.Session(graph=_graph) as sess:
40 |         #_model.init.run();
41 |         _model.saver.restore(sess, conf["init_model"])
42 |         print("sucess init %s" %conf["init_model"])
43 | 
44 |         batch_index = 0
45 |         step = 0
46 | 
47 |         score_file_path = conf['save_path'] + 'score.test'
48 |         score_file = open(score_file_path, 'w')
49 | 
50 |         print('starting test')
51 |         print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
52 |         for batch_index in xrange(test_batch_num):
53 |                 
54 |             feed = { 
55 |                 _model.turns: test_batches["turns"][batch_index],
56 |                 _model.tt_turns_len: test_batches["tt_turns_len"][batch_index],
57 |                 _model.every_turn_len: test_batches["every_turn_len"][batch_index],
58 |                 _model.response: test_batches["response"][batch_index],
59 |                 _model.response_len: test_batches["response_len"][batch_index],
60 |                 _model.label: test_batches["label"][batch_index]
61 |                 }   
62 |                 
63 |             scores = sess.run(_model.logits, feed_dict = feed)
64 |                     
65 |             for i in xrange(conf["batch_size"]):
66 |                 score_file.write(
67 |                     str(scores[i]) + '\t' + 
68 |                     str(test_batches["label"][batch_index][i]) + '\n')
69 |                     #str(sum(test_batches["every_turn_len"][batch_index][i]) / test_batches['tt_turns_len'][batch_index][i]) + '\t' +
70 |                     #str(test_batches['tt_turns_len'][batch_index][i]) + '\n') 
71 | 
72 |         score_file.close()
73 |         print('finish test')
74 |         print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
75 | 
76 |         
77 |         #write evaluation result
78 |         result = eva.evaluate(score_file_path)
79 |         result_file_path = conf["save_path"] + "result.test"
80 |         with open(result_file_path, 'w') as out_file:
81 |             for p_at in result:
82 |                 out_file.write(str(p_at) + '\n')
83 |         print('finish evaluation')
84 |         print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
85 |         
86 | 
87 |                     
88 | 


--------------------------------------------------------------------------------
/DAM/README.md:
--------------------------------------------------------------------------------
 1 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 2 | 
 3 | # __New Record__
 4 | 
 5 | We got the new, best score of R_10 at 1 (85.67%) in the Ubuntu Corpus by incorporating ERNIE_English, an English pre-trained model from Baidu. Please refer to DMTK (the Dialogue Modeling ToolKit) for more details. https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/dialogue_model_toolkit
 6 | 
 7 | 
 8 | 
 9 | # __Deep Attention Matching Network__
10 | 
11 | This is the source code of Deep Attention Matching network (DAM), that is proposed for multi-turn response selection in the retrieval-based chatbot.
12 | 
13 | DAM is a neural matching network that entirely based on attention mechanism. The motivation of DAM is to capture those semantic dependencies, among dialogue elements at different level of granularities, in multi-turn conversation as matching evidences, in order to better match response candidate with its multi-turn context. DAM will appear on ACL-2018, please find our paper at: http://acl2018.org/conference/accepted-papers/.
14 | 
15 | ## __Paddle Version__
16 | 
17 | DAM is originally implemented with Tensorflow, we highly recommend using the paddle version as Paddle supports parallely training with very large corpus.
18 | 
19 | You can find the paddle version at: https://github.com/PaddlePaddle/models/tree/develop/fluid .
20 | 
21 | ## __Network__
22 | 
23 | DAM is inspired by Transformer in Machine Translation (Vaswani et al., 2017), and we extend the key attention mechanism of Transformer in two perspectives and introduce those two kinds of attention in one uniform neural network.
24 | 
25 | - **self-attention** To gradually capture semantic representations in different granularities by stacking attention from word-level embeddings. Those multi-grained semantic representations would facilitate exploring segmental dependencies between context and response.
26 | 
27 | - **cross-attention** Attention across context and response can generally capture the relevance in dependency between segment pairs, which could provide complementary information to textual relevance for matching response with multi-turn context.
28 | 
29 | <div align=center>
30 | <img src="appendix/Figure1.png" width=800>
31 | </div>
32 | 
33 | ## __Results__
34 | 
35 | We test DAM on two large-scale multi-turn response selection tasks, i.e., the Ubuntu Corpus v1 and Douban Conversation Corpus, experimental results are bellow:
36 | 
37 | <img src="appendix/Figure2.png">
38 | 
39 | ## __Usage__
40 | 
41 | First, please download [data](https://pan.baidu.com/s/1hakfuuwdS8xl7NyxlWzRiQ "data") and unzip it:
42 | ```
43 | cd data
44 | unzip data.zip
45 | ```
46 | 
47 | If you want use well trained models directly, please download [models](https://pan.baidu.com/s/1pl4d63MBxihgrEWWfdAz0w "models") and unzip it:
48 | ```
49 | cd output
50 | unzip output.zip
51 | ```
52 | 
53 | Train and test the model by:
54 | ```
55 | sh run.sh
56 | ```
57 | 
58 | ## __Dependencies__
59 | 
60 | - Python >= 2.7.3
61 | - Tensorflow == 1.2.1
62 | 
63 | ## __Citation__
64 | 
65 | The following article describe the DAM in detail. We recommend citing this article as default.
66 | 
67 | ```
68 | @inproceedings{ ,
69 |   title={Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network},
70 |   author={Xiangyang Zhou, Lu Li, Daxiang Dong, Yi Liu, Ying Chen, Wayne Xin Zhao, Dianhai Yu and Hua Wu},
71 |   booktitle={Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
72 |   volume={1},
73 |   pages={  --  },
74 |   year={2018}
75 | }
76 | ```
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/ADE/inference_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """save inference model for auto dialogue evaluation"""
 15 | 
 16 | import os
 17 | import sys
 18 | import six
 19 | import numpy as np
 20 | import time
 21 | import paddle
 22 | import paddle.fluid as fluid
 23 | 
 24 | import ade.reader as reader
 25 | from ade_net import create_net
 26 | 
 27 | from ade.utils.configure import PDConfig
 28 | from ade.utils.input_field import InputField
 29 | from ade.utils.model_check import check_cuda
 30 | import ade.utils.save_load_io as save_load_io
 31 | 
 32 | 
 33 | def do_save_inference_model(args):
 34 | 
 35 |     test_prog = fluid.default_main_program()
 36 |     startup_prog = fluid.default_startup_program()
 37 | 
 38 |     with fluid.program_guard(test_prog, startup_prog):
 39 |         test_prog.random_seed = args.random_seed
 40 |         startup_prog.random_seed = args.random_seed
 41 | 
 42 |         with fluid.unique_name.guard():
 43 | 
 44 |             context_wordseq = fluid.data(
 45 |                     name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
 46 |             response_wordseq = fluid.data(
 47 |                     name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
 48 |             labels = fluid.data(
 49 |                     name='labels', shape=[-1, 1], dtype='int64')
 50 | 
 51 |             input_inst = [context_wordseq, response_wordseq, labels]
 52 |             input_field = InputField(input_inst)
 53 |             data_reader = fluid.io.PyReader(feed_list=input_inst, 
 54 |                         capacity=4, iterable=False)
 55 | 
 56 |             logits = create_net(
 57 |                     is_training=False,
 58 |                     model_input=input_field, 
 59 |                     args=args
 60 |                 )
 61 | 
 62 |     if args.use_cuda:
 63 |         place = fluid.CUDAPlace(0)
 64 |     else:
 65 |         place = fluid.CPUPlace()
 66 | 
 67 |     exe = fluid.Executor(place)
 68 |     exe.run(startup_prog)
 69 | 
 70 |     assert (args.init_from_params) or (args.init_from_pretrain_model)
 71 |     
 72 |     if args.init_from_params:
 73 |         save_load_io.init_from_params(args, exe, test_prog)
 74 |     elif args.init_from_pretrain_model:
 75 |         save_load_io.init_from_pretrain_model(args, exe, test_prog)
 76 | 
 77 |     # saving inference model
 78 |     fluid.io.save_inference_model(
 79 |             args.inference_model_dir,
 80 |             feeded_var_names=[
 81 |                 input_field.context_wordseq.name, 
 82 |                 input_field.response_wordseq.name,
 83 |             ],
 84 |             target_vars=[
 85 |                 logits,
 86 |             ],
 87 |             executor=exe,
 88 |             main_program=test_prog,
 89 |             model_filename="model.pdmodel",
 90 |             params_filename="params.pdparams")
 91 | 
 92 |     print("save inference model at %s" % (args.inference_model_dir))
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     args = PDConfig(yaml_file="./data/config/ade.yaml")   
 97 |     args.build()
 98 | 
 99 |     check_cuda(args.use_cuda)
100 | 
101 |     do_save_inference_model(args)
102 | 


--------------------------------------------------------------------------------
/DGU/dgu/utils/fp16.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import print_function
16 | import paddle
17 | import paddle.fluid as fluid
18 | 
19 | 
20 | def cast_fp16_to_fp32(i, o, prog):
21 |     prog.global_block().append_op(
22 |         type="cast",
23 |         inputs={"X": i},
24 |         outputs={"Out": o},
25 |         attrs={
26 |             "in_dtype": fluid.core.VarDesc.VarType.FP16,
27 |             "out_dtype": fluid.core.VarDesc.VarType.FP32
28 |         })
29 | 
30 | 
31 | def cast_fp32_to_fp16(i, o, prog):
32 |     prog.global_block().append_op(
33 |         type="cast",
34 |         inputs={"X": i},
35 |         outputs={"Out": o},
36 |         attrs={
37 |             "in_dtype": fluid.core.VarDesc.VarType.FP32,
38 |             "out_dtype": fluid.core.VarDesc.VarType.FP16
39 |         })
40 | 
41 | 
42 | def copy_to_master_param(p, block):
43 |     v = block.vars.get(p.name, None)
44 |     if v is None:
45 |         raise ValueError("no param name %s found!" % p.name)
46 |     new_p = fluid.framework.Parameter(
47 |         block=block,
48 |         shape=v.shape,
49 |         dtype=fluid.core.VarDesc.VarType.FP32,
50 |         type=v.type,
51 |         lod_level=v.lod_level,
52 |         stop_gradient=p.stop_gradient,
53 |         trainable=p.trainable,
54 |         optimize_attr=p.optimize_attr,
55 |         regularizer=p.regularizer,
56 |         gradient_clip_attr=p.gradient_clip_attr,
57 |         error_clip=p.error_clip,
58 |         name=v.name + ".master")
59 |     return new_p
60 | 
61 | 
62 | def create_master_params_grads(params_grads, main_prog, startup_prog,
63 |                                loss_scaling):
64 |     master_params_grads = []
65 |     tmp_role = main_prog._current_role
66 |     OpRole = fluid.core.op_proto_and_checker_maker.OpRole
67 |     main_prog._current_role = OpRole.Backward
68 |     for p, g in params_grads:
69 |         # create master parameters
70 |         master_param = copy_to_master_param(p, main_prog.global_block())
71 |         startup_master_param = startup_prog.global_block()._clone_variable(
72 |             master_param)
73 |         startup_p = startup_prog.global_block().var(p.name)
74 |         cast_fp16_to_fp32(startup_p, startup_master_param, startup_prog)
75 |         # cast fp16 gradients to fp32 before apply gradients
76 |         if g.name.find("layer_norm") > -1:
77 |             if loss_scaling > 1:
78 |                 scaled_g = g / float(loss_scaling)
79 |             else:
80 |                 scaled_g = g
81 |             master_params_grads.append([p, scaled_g])
82 |             continue
83 |         master_grad = fluid.layers.cast(g, "float32")
84 |         if loss_scaling > 1:
85 |             master_grad = master_grad / float(loss_scaling)
86 |         master_params_grads.append([master_param, master_grad])
87 |     main_prog._current_role = tmp_role
88 |     return master_params_grads
89 | 
90 | 
91 | def master_param_to_train_param(master_params_grads, params_grads, main_prog):
92 |     for idx, m_p_g in enumerate(master_params_grads):
93 |         train_p, _ = params_grads[idx]
94 |         if train_p.name.find("layer_norm") > -1:
95 |             continue
96 |         with main_prog._optimized_guard([m_p_g[0], m_p_g[1]]):
97 |             cast_fp32_to_fp16(m_p_g[0], train_p, main_prog)
98 | 


--------------------------------------------------------------------------------
/ADE/ade/utils/save_load_io.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """save or load model api"""
 15 | 
 16 | import os
 17 | import sys
 18 | 
 19 | import paddle
 20 | import paddle.fluid as fluid
 21 | 
 22 | 
 23 | def init_from_pretrain_model(args, exe, program):
 24 | 
 25 |     assert isinstance(args.init_from_pretrain_model, str)
 26 | 
 27 |     if not os.path.exists(args.init_from_pretrain_model):
 28 |         raise Warning("The pretrained params do not exist.")
 29 |         return False
 30 | 
 31 |     def existed_params(var):
 32 |         if not isinstance(var, fluid.framework.Parameter):
 33 |             return False
 34 |         return os.path.exists(
 35 |             os.path.join(args.init_from_pretrain_model, var.name))
 36 | 
 37 |     fluid.io.load_vars(
 38 |         exe,
 39 |         args.init_from_pretrain_model,
 40 |         main_program=program,
 41 |         predicate=existed_params)
 42 | 
 43 |     print("finish initing model from pretrained params from %s" %
 44 |           (args.init_from_pretrain_model))
 45 | 
 46 |     return True
 47 | 
 48 | 
 49 | def init_from_checkpoint(args, exe, program):
 50 | 
 51 |     assert isinstance(args.init_from_checkpoint, str)
 52 | 
 53 |     if not os.path.exists(args.init_from_checkpoint):
 54 |         raise Warning("the checkpoint path does not exist.")
 55 |         return False
 56 | 
 57 |     fluid.io.load_persistables(
 58 |         executor=exe,
 59 |         dirname=args.init_from_checkpoint,
 60 |         main_program=program,
 61 |         filename="checkpoint.pdckpt")
 62 | 
 63 |     print("finish initing model from checkpoint from %s" %
 64 |           (args.init_from_checkpoint))
 65 | 
 66 |     return True
 67 | 
 68 | 
 69 | def init_from_params(args, exe, program):
 70 | 
 71 |     assert isinstance(args.init_from_params, str)
 72 |     
 73 |     if not os.path.exists(args.init_from_params): 
 74 |         raise Warning("the params path does not exist.")
 75 |         return False
 76 | 
 77 |     fluid.io.load_params(
 78 |         executor=exe,
 79 |         dirname=args.init_from_params,
 80 |         main_program=program,
 81 |         filename="params.pdparams")
 82 | 
 83 |     print("finish init model from params from %s" % (args.init_from_params))
 84 | 
 85 |     return True
 86 | 
 87 | 
 88 | def save_checkpoint(args, exe, program, dirname):
 89 | 
 90 |     assert isinstance(args.save_model_path, str)
 91 | 
 92 |     checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint)
 93 | 
 94 |     if not os.path.exists(checkpoint_dir):
 95 |         os.mkdir(checkpoint_dir)
 96 | 
 97 |     fluid.io.save_persistables(
 98 |         exe,
 99 |         os.path.join(checkpoint_dir, dirname),
100 |         main_program=program,
101 |         filename="checkpoint.pdckpt")
102 | 
103 |     print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname)))
104 | 
105 |     return True
106 | 
107 | 
108 | def save_param(args, exe, program, dirname):
109 | 
110 |     assert isinstance(args.save_model_path, str)
111 | 
112 |     param_dir = os.path.join(args.save_model_path, args.save_param)
113 | 
114 |     if not os.path.exists(param_dir):
115 |         os.makedirs(param_dir)
116 | 
117 |     fluid.io.save_params(
118 |         exe,
119 |         os.path.join(param_dir, dirname),
120 |         main_program=program,
121 |         filename="params.pdparams")
122 |     print("save parameters at %s" % (os.path.join(param_dir, dirname)))
123 | 
124 |     return True
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/DGU/dgu/utils/save_load_io.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """save or load model api"""
 15 | 
 16 | import os
 17 | import sys
 18 | 
 19 | import paddle
 20 | import paddle.fluid as fluid
 21 | 
 22 | 
 23 | def init_from_pretrain_model(args, exe, program):
 24 | 
 25 |     assert isinstance(args.init_from_pretrain_model, str)
 26 | 
 27 |     if not os.path.exists(args.init_from_pretrain_model):
 28 |         raise Warning("The pretrained params do not exist.")
 29 |         return False
 30 | 
 31 |     def existed_params(var):
 32 |         if not isinstance(var, fluid.framework.Parameter):
 33 |             return False
 34 |         return os.path.exists(
 35 |             os.path.join(args.init_from_pretrain_model, var.name))
 36 | 
 37 |     fluid.io.load_vars(
 38 |         exe,
 39 |         args.init_from_pretrain_model,
 40 |         main_program=program,
 41 |         predicate=existed_params)
 42 | 
 43 |     print("finish initing model from pretrained params from %s" %
 44 |           (args.init_from_pretrain_model))
 45 | 
 46 |     return True
 47 | 
 48 | 
 49 | def init_from_checkpoint(args, exe, program):
 50 | 
 51 |     assert isinstance(args.init_from_checkpoint, str)
 52 | 
 53 |     if not os.path.exists(args.init_from_checkpoint):
 54 |         raise Warning("the checkpoint path does not exist.")
 55 |         return False
 56 | 
 57 |     fluid.io.load_persistables(
 58 |         executor=exe,
 59 |         dirname=args.init_from_checkpoint,
 60 |         main_program=program,
 61 |         filename="checkpoint.pdckpt")
 62 | 
 63 |     print("finish initing model from checkpoint from %s" %
 64 |           (args.init_from_checkpoint))
 65 | 
 66 |     return True
 67 | 
 68 | 
 69 | def init_from_params(args, exe, program):
 70 | 
 71 |     assert isinstance(args.init_from_params, str)
 72 |     
 73 |     if not os.path.exists(args.init_from_params): 
 74 |         raise Warning("the params path does not exist.")
 75 |         return False
 76 | 
 77 |     fluid.io.load_params(
 78 |         executor=exe,
 79 |         dirname=args.init_from_params,
 80 |         main_program=program,
 81 |         filename="params.pdparams")
 82 | 
 83 |     print("finish init model from params from %s" % (args.init_from_params))
 84 | 
 85 |     return True
 86 | 
 87 | 
 88 | def save_checkpoint(args, exe, program, dirname):
 89 | 
 90 |     assert isinstance(args.save_model_path, str)
 91 | 
 92 |     checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint)
 93 | 
 94 |     if not os.path.exists(checkpoint_dir):
 95 |         os.mkdir(checkpoint_dir)
 96 | 
 97 |     fluid.io.save_persistables(
 98 |         exe,
 99 |         os.path.join(checkpoint_dir, dirname),
100 |         main_program=program,
101 |         filename="checkpoint.pdckpt")
102 | 
103 |     print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname)))
104 | 
105 |     return True
106 | 
107 | 
108 | def save_param(args, exe, program, dirname):
109 | 
110 |     assert isinstance(args.save_model_path, str)
111 | 
112 |     param_dir = os.path.join(args.save_model_path, args.save_param)
113 | 
114 |     if not os.path.exists(param_dir):
115 |         os.makedirs(param_dir)
116 |     
117 |     fluid.io.save_params(
118 |         exe,
119 |         os.path.join(param_dir, dirname),
120 |         main_program=program,
121 |         filename="params.pdparams")
122 |     print("save parameters at %s" % (os.path.join(param_dir, dirname)))
123 | 
124 |     return True
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/ADE/ade_net.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Network for auto dialogue evaluation"""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import paddle
 21 | import paddle.fluid as fluid
 22 | 
 23 | 
 24 | def create_net(
 25 |     is_training,
 26 |     model_input,
 27 |     args, 
 28 |     clip_value=10.0,
 29 |     word_emb_name="shared_word_emb",
 30 |     lstm_W_name="shared_lstm_W",
 31 |     lstm_bias_name="shared_lstm_bias"): 
 32 | 
 33 |     context_wordseq = model_input.context_wordseq
 34 |     response_wordseq = model_input.response_wordseq
 35 |     label = model_input.labels
 36 | 
 37 |     #emb
 38 |     context_emb = fluid.input.embedding(
 39 |         input=context_wordseq,
 40 |         size=[args.vocab_size, args.emb_size],
 41 |         is_sparse=True,
 42 |         param_attr=fluid.ParamAttr(
 43 |             name=word_emb_name,
 44 |             initializer=fluid.initializer.Normal(scale=0.1)))
 45 | 
 46 |     response_emb = fluid.input.embedding(
 47 |         input=response_wordseq,
 48 |         size=[args.vocab_size, args.emb_size],
 49 |         is_sparse=True,
 50 |         param_attr=fluid.ParamAttr(
 51 |             name=word_emb_name,
 52 |             initializer=fluid.initializer.Normal(scale=0.1)))
 53 | 
 54 |     #fc to fit dynamic LSTM
 55 |     context_fc = fluid.layers.fc(
 56 |         input=context_emb,
 57 |         size=args.hidden_size * 4,
 58 |         param_attr=fluid.ParamAttr(name='fc_weight'),
 59 |         bias_attr=fluid.ParamAttr(name='fc_bias'))
 60 | 
 61 |     response_fc = fluid.layers.fc(
 62 |         input=response_emb,
 63 |         size=args.hidden_size * 4,
 64 |         param_attr=fluid.ParamAttr(name='fc_weight'),
 65 |         bias_attr=fluid.ParamAttr(name='fc_bias'))
 66 | 
 67 |     #LSTM
 68 |     context_rep, _ = fluid.layers.dynamic_lstm(
 69 |         input=context_fc,
 70 |         size=args.hidden_size * 4,
 71 |         param_attr=fluid.ParamAttr(name=lstm_W_name),
 72 |         bias_attr=fluid.ParamAttr(name=lstm_bias_name))
 73 |     context_rep = fluid.layers.sequence_last_step(context_rep)
 74 | 
 75 |     response_rep, _ = fluid.layers.dynamic_lstm(
 76 |         input=response_fc,
 77 |         size=args.hidden_size * 4,
 78 |         param_attr=fluid.ParamAttr(name=lstm_W_name),
 79 |         bias_attr=fluid.ParamAttr(name=lstm_bias_name))
 80 |     response_rep = fluid.layers.sequence_last_step(input=response_rep)
 81 | 
 82 |     logits = fluid.layers.bilinear_tensor_product(
 83 |         context_rep, response_rep, size=1)
 84 | 
 85 |     if args.loss_type == 'CLS': 
 86 |         label = fluid.layers.cast(x=label, dtype='float32')
 87 |         loss = fluid.layers.sigmoid_cross_entropy_with_logits(logits, label)
 88 |         loss = fluid.layers.reduce_mean(
 89 |             fluid.layers.clip(
 90 |                 loss, min=-clip_value, max=clip_value))
 91 |     elif args.loss_type == 'L2':
 92 |         norm_score = 2 * fluid.layers.sigmoid(logits)
 93 |         label = fluid.layers.cast(x=label, dtype='float32')
 94 |         loss = fluid.layers.square_error_cost(norm_score, label) / 4
 95 |         loss = fluid.layers.reduce_mean(loss)
 96 |     else:
 97 |         raise ValueError
 98 |     
 99 |     if is_training: 
100 |         return loss
101 |     else: 
102 |         return logits
103 | 
104 | 
105 | def set_word_embedding(word_emb, place, word_emb_name="shared_word_emb"):
106 |     """
107 |     Set word embedding
108 |     """
109 |     word_emb_param = fluid.global_scope().find_var(
110 |         word_emb_name).get_tensor()
111 |     word_emb_param.set(word_emb, place)
112 | 
113 | 


--------------------------------------------------------------------------------
/ADE/predict.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """predict auto dialogue evaluation task"""
 16 | import io
 17 | import os
 18 | import sys
 19 | import six
 20 | import time
 21 | import numpy as np
 22 | 
 23 | import paddle
 24 | import paddle.fluid as fluid
 25 | 
 26 | import ade.reader as reader
 27 | from ade_net import create_net
 28 | 
 29 | from ade.utils.configure import PDConfig
 30 | from ade.utils.input_field import InputField
 31 | from ade.utils.model_check import check_cuda
 32 | import ade.utils.save_load_io as save_load_io
 33 | 
 34 | 
 35 | def do_predict(args): 
 36 |     """
 37 |     predict function
 38 |     """
 39 |     test_prog = fluid.default_main_program()
 40 |     startup_prog = fluid.default_startup_program()
 41 | 
 42 |     with fluid.program_guard(test_prog, startup_prog):
 43 |         test_prog.random_seed = args.random_seed
 44 |         startup_prog.random_seed = args.random_seed
 45 | 
 46 |         with fluid.unique_name.guard():
 47 | 
 48 |             context_wordseq = fluid.data(
 49 |                     name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
 50 |             response_wordseq = fluid.data(
 51 |                     name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
 52 |             labels = fluid.data(
 53 |                     name='labels', shape=[-1, 1], dtype='int64')
 54 | 
 55 |             input_inst = [context_wordseq, response_wordseq, labels]
 56 |             input_field = InputField(input_inst)
 57 |             data_reader = fluid.io.PyReader(feed_list=input_inst, 
 58 |                         capacity=4, iterable=False)
 59 | 
 60 |             logits = create_net(
 61 |                     is_training=False,
 62 |                     model_input=input_field, 
 63 |                     args=args
 64 |                 )
 65 |             logits.persistable = True
 66 | 
 67 |             fetch_list = [logits.name]
 68 |     #for_test is True if change the is_test attribute of operators to True
 69 |     test_prog = test_prog.clone(for_test=True)
 70 |     if args.use_cuda: 
 71 |         place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
 72 |     else: 
 73 |         place = fluid.CPUPlace()
 74 | 
 75 |     exe = fluid.Executor(place)
 76 |     exe.run(startup_prog)
 77 | 
 78 |     assert (args.init_from_params) or (args.init_from_pretrain_model)
 79 |     if args.init_from_params:
 80 |         save_load_io.init_from_params(args, exe, test_prog)
 81 |     if args.init_from_pretrain_model:
 82 |         save_load_io.init_from_pretrain_model(args, exe, test_prog)
 83 | 
 84 |     compiled_test_prog = fluid.CompiledProgram(test_prog)
 85 | 
 86 |     processor = reader.DataProcessor(
 87 |         data_path=args.predict_file,
 88 |         max_seq_length=args.max_seq_len, 
 89 |         batch_size=args.batch_size)
 90 | 
 91 |     batch_generator = processor.data_generator(
 92 |         place=place,
 93 |         phase="test",
 94 |         shuffle=False, 
 95 |         sample_pro=1)
 96 |     num_test_examples = processor.get_num_examples(phase='test')
 97 | 
 98 |     data_reader.decorate_batch_generator(batch_generator)
 99 |     data_reader.start()
100 | 
101 |     scores = []
102 |     while True: 
103 |         try: 
104 |             results = exe.run(compiled_test_prog, fetch_list=fetch_list)
105 |             scores.extend(results[0])
106 |         except fluid.core.EOFException:
107 |             data_reader.reset()
108 |             break
109 | 
110 |     scores = scores[: num_test_examples]
111 |     print("Write the predicted results into the output_prediction_file")
112 |     fw = io.open(args.output_prediction_file, 'w', encoding="utf8")
113 |     for index, score in enumerate(scores): 
114 |         fw.write("%s\t%s\n" % (index, score))
115 |     print("finish........................................")
116 | 
117 | 
118 | if __name__ == "__main__": 
119 |     
120 |     args = PDConfig(yaml_file="./data/config/ade.yaml")
121 |     args.build()
122 |     args.Print()
123 | 
124 |     check_cuda(args.use_cuda)
125 | 
126 |     do_predict(args) 
127 | 


--------------------------------------------------------------------------------
/DGU/inference_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """save inference model"""
 15 | 
 16 | import os
 17 | import sys
 18 | import argparse
 19 | import collections
 20 | import numpy as np
 21 | 
 22 | import paddle
 23 | import paddle.fluid as fluid
 24 | 
 25 | from dgu.utils.configure import PDConfig
 26 | from dgu.utils.input_field import InputField
 27 | from dgu.utils.model_check import check_cuda
 28 | import dgu.utils.save_load_io as save_load_io
 29 | 
 30 | import dgu.reader as reader
 31 | from dgu_net import create_net
 32 | import dgu.define_paradigm as define_paradigm 
 33 | 
 34 | 
 35 | def do_save_inference_model(args): 
 36 |     """save inference model function"""
 37 | 
 38 |     task_name = args.task_name.lower()
 39 |     paradigm_inst = define_paradigm.Paradigm(task_name)
 40 | 
 41 |     processors = {
 42 |         'udc': reader.UDCProcessor,
 43 |         'swda': reader.SWDAProcessor,
 44 |         'mrda': reader.MRDAProcessor,
 45 |         'atis_slot': reader.ATISSlotProcessor,
 46 |         'atis_intent': reader.ATISIntentProcessor,
 47 |         'dstc2': reader.DSTC2Processor,
 48 |     }
 49 | 
 50 |     test_prog = fluid.default_main_program()
 51 |     startup_prog = fluid.default_startup_program()
 52 | 
 53 |     with fluid.program_guard(test_prog, startup_prog):
 54 |         test_prog.random_seed = args.random_seed
 55 |         startup_prog.random_seed = args.random_seed
 56 | 
 57 |         with fluid.unique_name.guard():
 58 | 
 59 |             # define inputs of the network
 60 |             num_labels = len(processors[task_name].get_labels()) 
 61 | 
 62 |             src_ids = fluid.data(
 63 |                         name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
 64 |             pos_ids = fluid.data(
 65 |                         name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
 66 |             sent_ids = fluid.data(
 67 |                         name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
 68 |             input_mask = fluid.data(
 69 |                         name='input_mask', shape=[-1, args.max_seq_len], dtype='float32')
 70 |             if args.task_name == 'atis_slot': 
 71 |                 labels = fluid.data(
 72 |                         name='labels', shape=[-1, args.max_seq_len], dtype='int64')
 73 |             elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
 74 |                 labels = fluid.data(
 75 |                         name='labels', shape=[-1, num_labels], dtype='int64')
 76 |             else: 
 77 |                 labels = fluid.data(
 78 |                         name='labels', shape=[-1, 1], dtype='int64')
 79 |             
 80 |             input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
 81 |             input_field = InputField(input_inst)
 82 |             
 83 |             results = create_net(
 84 |                     is_training=False, 
 85 |                     model_input=input_field, 
 86 |                     num_labels=num_labels,
 87 |                     paradigm_inst=paradigm_inst,
 88 |                     args=args)
 89 |             probs = results.get("probs", None)
 90 | 
 91 |     if args.use_cuda:
 92 |         place = fluid.CUDAPlace(0)
 93 |     else:
 94 |         place = fluid.CPUPlace()
 95 | 
 96 |     exe = fluid.Executor(place)
 97 |     exe.run(startup_prog)
 98 | 
 99 |     assert (args.init_from_params) or (args.init_from_pretrain_model)
100 |     
101 |     if args.init_from_params:
102 |         save_load_io.init_from_params(args, exe, test_prog)
103 |     elif args.init_from_pretrain_model:
104 |         save_load_io.init_from_pretrain_model(args, exe, test_prog)
105 | 
106 |     # saving inference model
107 |     fluid.io.save_inference_model(
108 |             args.inference_model_dir,
109 |             feeded_var_names=[
110 |                 input_field.src_ids.name, 
111 |                 input_field.pos_ids.name,
112 |                 input_field.sent_ids.name, 
113 |                 input_field.input_mask.name
114 |             ],
115 |             target_vars=[
116 |                 probs
117 |             ],
118 |             executor=exe,
119 |             main_program=test_prog,
120 |             model_filename="model.pdmodel",
121 |             params_filename="params.pdparams")
122 | 
123 |     print("save inference model at %s" % (args.inference_model_dir))
124 | 
125 | 
126 | if __name__ == "__main__":
127 | 
128 |     args = PDConfig(yaml_file="./data/config/dgu.yaml")
129 |     args.build()
130 | 
131 |     check_cuda(args.use_cuda)
132 | 
133 |     do_save_inference_model(args)
134 | 


--------------------------------------------------------------------------------
/ADE/ade/reader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Reader for auto dialogue evaluation"""
 16 | 
 17 | import io
 18 | import sys
 19 | import time
 20 | import random
 21 | import numpy as np
 22 | 
 23 | import paddle
 24 | import paddle.fluid as fluid
 25 | 
 26 | 
 27 | class DataProcessor(object): 
 28 |     def __init__(self, data_path, max_seq_length, batch_size): 
 29 |         """init"""
 30 |         self.data_file = data_path
 31 |         self.max_seq_len = max_seq_length
 32 |         self.batch_size = batch_size
 33 |         self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
 34 | 
 35 |     def get_examples(self): 
 36 |         """load examples"""
 37 |         examples = []
 38 |         index = 0
 39 |         fr = io.open(self.data_file, 'r', encoding="utf8")
 40 |         for line in fr: 
 41 |             if index !=0 and index % 100 == 0: 
 42 |                 print("processing data: %d" % index)
 43 |             index += 1
 44 |             examples.append(line.strip())
 45 |         return examples
 46 | 
 47 |     def get_num_examples(self, phase): 
 48 |         """Get number of examples for train, dev or test."""
 49 |         if phase not in ['train', 'dev', 'test']: 
 50 |             raise ValueError(
 51 |                 "Unknown phase, which should be in ['train', 'dev', 'test'].")
 52 |         count = len(io.open(self.data_file, 'r', encoding="utf8").readlines())
 53 |         self.num_examples[phase] = count
 54 |         return self.num_examples[phase]
 55 | 
 56 |     def data_generator(self,
 57 |                        place,
 58 |                        phase="train",
 59 |                        shuffle=True,
 60 |                        sample_pro=1):
 61 |         """
 62 |         Generate data for train, dev or test.
 63 | 
 64 |         Args:
 65 |             phase: string. The phase for which to generate data.
 66 |             shuffle: bool. Whether to shuffle examples.
 67 |             sample_pro: sample data ratio
 68 |         """
 69 |         examples = self.get_examples()
 70 |         if shuffle: 
 71 |             np.random.shuffle(examples)
 72 |         
 73 |         def batch_reader():  
 74 |             """read batch data"""
 75 |             batch = []
 76 |             for example in examples: 
 77 |                 if sample_pro < 1:
 78 |                     if random.random() > sample_pro:
 79 |                         continue
 80 |                 tokens = example.strip().split('\t')
 81 |                 
 82 |                 if len(tokens) != 3: 
 83 |                     print("data format error: %s" % example.strip())
 84 |                     print("please input data: context \t response \t label")
 85 |                     continue
 86 | 
 87 |                 context = [int(x) for x in tokens[0].split()[: self.max_seq_len]]
 88 |                 response = [int(x) for x in tokens[1].split()[: self.max_seq_len]]
 89 |                 label = [int(tokens[2])]
 90 |                 instance = (context, response, label)
 91 | 
 92 |                 if len(batch) < self.batch_size:
 93 |                     batch.append(instance)
 94 |                 else:
 95 |                     if len(batch) == self.batch_size:
 96 |                         yield batch
 97 |                     batch = [instance]
 98 | 
 99 |             if len(batch) > 0: 
100 |                 yield batch
101 | 
102 |         def create_lodtensor(data_ids, place): 
103 |             """create LodTensor for input ids"""
104 |             cur_len = 0
105 |             lod = [cur_len]
106 |             seq_lens = [len(ids) for ids in data_ids]
107 |             for l in seq_lens: 
108 |                 cur_len += l
109 |                 lod.append(cur_len)
110 |             flattened_data = np.concatenate(data_ids, axis=0).astype("int64")
111 |             flattened_data = flattened_data.reshape([len(flattened_data), 1])
112 |             res = fluid.LoDTensor()
113 |             res.set(flattened_data, place)
114 |             res.set_lod([lod])
115 |             return res
116 | 
117 |         def wrapper(): 
118 |             """yield batch data to network""" 
119 |             for batch_data in batch_reader(): 
120 |                 context_ids = [batch[0] for batch in batch_data]
121 |                 response_ids = [batch[1] for batch in batch_data]
122 |                 label_ids = [batch[2] for batch in batch_data]
123 |                 context_res = create_lodtensor(context_ids, place)
124 |                 response_res = create_lodtensor(response_ids, place)
125 |                 label_ids = np.array(label_ids).astype("int64").reshape([-1, 1])
126 |                 input_batch = [context_res, response_res, label_ids]
127 |                 yield input_batch
128 |         
129 |         return wrapper
130 | 
131 | 


--------------------------------------------------------------------------------
/DGU/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | export FLAGS_sync_nccl_allreduce=0
  4 | export FLAGS_eager_delete_tensor_gb=1
  5 | 
  6 | export CUDA_VISIBLE_DEVICES=0
  7 | if  [ ! "$CUDA_VISIBLE_DEVICES" ]
  8 | then
  9 |     export CPU_NUM=1
 10 |     use_cuda=false
 11 | else
 12 |     use_cuda=true
 13 | fi
 14 | 
 15 | TASK_NAME=$1
 16 | TASK_TYPE=$2
 17 | 
 18 | BERT_BASE_PATH="./data/pretrain_model/uncased_L-12_H-768_A-12"
 19 | INPUT_PATH="./data/input/data/${TASK_NAME}"
 20 | SAVE_MODEL_PATH="./data/saved_models/${TASK_NAME}"
 21 | TRAIN_MODEL_PATH="./data/saved_models/trained_models"
 22 | OUTPUT_PATH="./data/output"
 23 | INFERENCE_MODEL="data/inference_models"
 24 | PYTHON_PATH="python"
 25 | 
 26 | if [ -f ${SAVE_MODEL_PATH} ]; then
 27 |     rm ${SAVE_MODEL_PATH}
 28 | fi
 29 | 
 30 | if [ ! -d ${SAVE_MODEL_PATH} ]; then
 31 | 	mkdir ${SAVE_MODEL_PATH}
 32 | fi
 33 | 
 34 | #parameter configuration
 35 | if [ "${TASK_NAME}" = "udc" ]
 36 | then
 37 |   save_steps=1000
 38 |   max_seq_len=210
 39 |   print_steps=1000
 40 |   batch_size=6720
 41 |   in_tokens=true
 42 |   epoch=2
 43 |   learning_rate=2e-5
 44 | elif [ "${TASK_NAME}" = "swda" ]
 45 | then
 46 |   save_steps=500
 47 |   max_seq_len=128
 48 |   print_steps=200
 49 |   batch_size=6720
 50 |   in_tokens=true
 51 |   epoch=3
 52 |   learning_rate=2e-5
 53 | elif [ "${TASK_NAME}" = "mrda" ]
 54 | then
 55 |   save_steps=500
 56 |   max_seq_len=128
 57 |   print_steps=200
 58 |   batch_size=4096
 59 |   in_tokens=true
 60 |   epoch=7
 61 |   learning_rate=2e-5
 62 | elif [ "${TASK_NAME}" = "atis_intent" ]
 63 | then
 64 |   save_steps=100
 65 |   max_seq_len=128
 66 |   print_steps=10
 67 |   batch_size=4096
 68 |   in_tokens=true
 69 |   epoch=20
 70 |   learning_rate=2e-5
 71 |   INPUT_PATH="./data/input/data/atis/${TASK_NAME}"
 72 | elif [ "${TASK_NAME}" = "atis_slot" ]
 73 | then
 74 |   save_steps=100
 75 |   max_seq_len=128
 76 |   print_steps=10
 77 |   batch_size=32
 78 |   in_tokens=False
 79 |   epoch=50
 80 |   learning_rate=2e-5
 81 |   INPUT_PATH="./data/input/data/atis/${TASK_NAME}"
 82 | elif [ "${TASK_NAME}" = "dstc2" ]
 83 | then
 84 |   save_steps=400
 85 |   print_steps=20
 86 |   batch_size=8192
 87 |   in_tokens=true
 88 |   epoch=40
 89 |   learning_rate=5e-5
 90 |   INPUT_PATH="./data/input/data/dstc2/${TASK_NAME}"
 91 |   if [ "${TASK_TYPE}" = "train" ]
 92 |   then
 93 |     max_seq_len=256
 94 |   else
 95 |     max_seq_len=512
 96 |   fi
 97 | else
 98 |   echo "not support ${TASK_NAME} dataset.."
 99 |   exit 255
100 | fi
101 | 
102 | #training
103 | function train()
104 | {
105 |     $PYTHON_PATH -u main.py \
106 |        --task_name=${TASK_NAME} \
107 |        --use_cuda=$1 \
108 |        --do_train=true \
109 |        --in_tokens=${in_tokens} \
110 |        --epoch=${epoch} \
111 |        --batch_size=${batch_size} \
112 |        --do_lower_case=true \
113 |        --data_dir=${INPUT_PATH} \
114 |        --bert_config_path=${BERT_BASE_PATH}/bert_config.json \
115 |        --vocab_path=${BERT_BASE_PATH}/vocab.txt \
116 |        --init_from_pretrain_model=${BERT_BASE_PATH}/params \
117 |        --save_model_path=${SAVE_MODEL_PATH} \
118 |        --save_param="params" \
119 |        --save_steps=${save_steps} \
120 |        --learning_rate=${learning_rate} \
121 |        --weight_decay=0.01 \
122 |        --max_seq_len=${max_seq_len} \
123 |        --print_steps=${print_steps};
124 | }
125 | 
126 | #predicting
127 | function predict()
128 | {
129 |     $PYTHON_PATH -u main.py \
130 |        --task_name=${TASK_NAME} \
131 |        --use_cuda=$1 \
132 |        --do_predict=true \
133 |        --in_tokens=${in_tokens} \
134 |        --batch_size=${batch_size} \
135 |        --data_dir=${INPUT_PATH} \
136 |        --do_lower_case=true \
137 |        --init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params \
138 |        --bert_config_path=${BERT_BASE_PATH}/bert_config.json \
139 |        --vocab_path=${BERT_BASE_PATH}/vocab.txt \
140 |        --output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME} \
141 |        --max_seq_len=${max_seq_len};
142 | }
143 | 
144 | #evaluating
145 | function evaluate()
146 | {
147 |     $PYTHON_PATH -u main.py \
148 |        --task_name=${TASK_NAME} \
149 |        --use_cuda=$1 \
150 |        --do_eval=True \
151 |        --evaluation_file=${INPUT_PATH}/test.txt \
152 |        --output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME};
153 | }
154 | 
155 | #saving the inference model
156 | function save_inference()
157 | {
158 |     $PYTHON_PATH -u main.py \
159 |        --task_name=${TASK_NAME} \
160 |        --use_cuda=$1 \
161 |        --init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params \
162 |        --do_save_inference_model=True \
163 |        --bert_config_path=${BERT_BASE_PATH}/bert_config.json \
164 |        --inference_model_dir=${INFERENCE_MODEL}/${TASK_NAME};
165 | }
166 | 
167 | if [ "${TASK_TYPE}" = "train" ]
168 | then
169 |     echo "train $TASK_NAME start..........";
170 |     train $use_cuda;
171 |     echo ""train $TASK_NAME finish..........
172 | elif [ "${TASK_TYPE}" = "predict" ]
173 | then 
174 |     echo "predict $TASK_NAME start..........";
175 |     predict $use_cuda;
176 |     echo "predict $TASK_NAME finish..........";
177 | elif [ "${TASK_TYPE}" = "evaluate" ]
178 | then
179 |     export CUDA_VISIBLE_DEVICES=
180 |     echo "evaluate $TASK_NAME start.........."; 
181 |     evaluate false;
182 |     echo "evaluate $TASK_NAME finish..........";
183 | elif [ "${TASK_TYPE}" = "inference" ]
184 | then
185 |     echo "save $TASK_NAME inference model start..........";
186 |     save_inference $use_cuda;
187 |     echo "save $TASK_NAME inference model finish..........";
188 | elif [ "${TASK_TYPE}" = "all" ]
189 | then
190 |     echo "Execute train、predict、evaluate and save inference model in sequence...."
191 |     train $use_cuda;
192 |     predict $use_cuda;
193 |     evaluate false;
194 |     save_inference $use_cuda;
195 |     echo "done";
196 | else
197 |     echo "Parameter $TASK_TYPE is not supported, you can input parameter in [train|predict|evaluate|inference|all]"
198 |     exit 255;
199 | fi
200 |     
201 | 
202 | 


--------------------------------------------------------------------------------
/DGU/dgu/optimization.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Optimization and learning rate scheduling."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import numpy as np
 21 | import paddle.fluid as fluid
 22 | from dgu.utils.fp16 import create_master_params_grads, master_param_to_train_param
 23 | 
 24 | 
 25 | def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
 26 |     """ Applies linear warmup of learning rate from 0 and decay to 0."""
 27 |     with fluid.default_main_program()._lr_schedule_guard():
 28 |         lr = fluid.layers.tensor.create_global_var(
 29 |             shape=[1],
 30 |             value=0.0,
 31 |             dtype='float32',
 32 |             persistable=True,
 33 |             name="scheduled_learning_rate")
 34 | 
 35 |         global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
 36 | 
 37 |         with fluid.layers.control_flow.Switch() as switch:
 38 |             with switch.case(global_step < warmup_steps):
 39 |                 warmup_lr = learning_rate * (global_step / warmup_steps)
 40 |                 fluid.layers.tensor.assign(warmup_lr, lr)
 41 |             with switch.default():
 42 |                 decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay(
 43 |                     learning_rate=learning_rate,
 44 |                     decay_steps=num_train_steps,
 45 |                     end_learning_rate=0.0,
 46 |                     power=1.0,
 47 |                     cycle=False)
 48 |                 fluid.layers.tensor.assign(decayed_lr, lr)
 49 | 
 50 |         return lr
 51 | 
 52 | 
 53 | def optimization(loss,
 54 |                  warmup_steps,
 55 |                  num_train_steps,
 56 |                  learning_rate,
 57 |                  train_program,
 58 |                  startup_prog,
 59 |                  weight_decay,
 60 |                  scheduler='linear_warmup_decay',
 61 |                  use_fp16=False,
 62 |                  loss_scaling=1.0):
 63 |     if warmup_steps > 0:
 64 |         if scheduler == 'noam_decay':
 65 |             scheduled_lr = fluid.layers.learning_rate_scheduler\
 66 |              .noam_decay(1/(warmup_steps *(learning_rate ** 2)),
 67 |                          warmup_steps)
 68 |         elif scheduler == 'linear_warmup_decay':
 69 |             scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
 70 |                                                num_train_steps)
 71 |         else:
 72 |             raise ValueError("Unkown learning rate scheduler, should be "
 73 |                              "'noam_decay' or 'linear_warmup_decay'")
 74 |         optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
 75 |     else:
 76 |         optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
 77 |         scheduled_lr = learning_rate
 78 | 
 79 |     clip_norm_thres = 1.0
 80 |     # When using mixed precision training, scale the gradient clip threshold
 81 |     # by loss_scaling
 82 |     if use_fp16 and loss_scaling > 1.0:
 83 |         clip_norm_thres *= loss_scaling
 84 |     fluid.clip.set_gradient_clip(
 85 |         clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
 86 | 
 87 |     def exclude_from_weight_decay(name):
 88 |         if name.find("layer_norm") > -1:
 89 |             return True
 90 |         bias_suffix = ["_bias", "_b", ".b_0"]
 91 |         for suffix in bias_suffix:
 92 |             if name.endswith(suffix):
 93 |                 return True
 94 |         return False
 95 | 
 96 |     param_list = dict()
 97 | 
 98 |     if use_fp16:
 99 |         param_grads = optimizer.backward(loss)
100 |         master_param_grads = create_master_params_grads(
101 |             param_grads, train_program, startup_prog, loss_scaling)
102 | 
103 |         for param, _ in master_param_grads:
104 |             param_list[param.name] = param * 1.0
105 |             param_list[param.name].stop_gradient = True
106 | 
107 |         optimizer.apply_gradients(master_param_grads)
108 | 
109 |         if weight_decay > 0:
110 |             for param, grad in master_param_grads:
111 |                 if exclude_from_weight_decay(param.name.rstrip(".master")):
112 |                     continue
113 |                 with param.block.program._optimized_guard(
114 |                     [param, grad]), fluid.framework.name_scope("weight_decay"):
115 |                     updated_param = param - param_list[
116 |                         param.name] * weight_decay * scheduled_lr
117 |                     fluid.layers.assign(output=param, input=updated_param)
118 | 
119 |         master_param_to_train_param(master_param_grads, param_grads,
120 |                                     train_program)
121 | 
122 |     else:
123 |         for param in train_program.global_block().all_parameters():
124 |             param_list[param.name] = param * 1.0
125 |             param_list[param.name].stop_gradient = True
126 | 
127 |         _, param_grads = optimizer.minimize(loss)
128 | 
129 |         if weight_decay > 0:
130 |             for param, grad in param_grads:
131 |                 if exclude_from_weight_decay(param.name):
132 |                     continue
133 |                 with param.block.program._optimized_guard(
134 |                     [param, grad]), fluid.framework.name_scope("weight_decay"):
135 |                     updated_param = param - param_list[
136 |                         param.name] * weight_decay * scheduled_lr
137 |                     fluid.layers.assign(output=param, input=updated_param)
138 | 
139 |     return scheduled_lr
140 | 


--------------------------------------------------------------------------------
/DGU/dgu/scripts/build_mrda_dataset.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """build mrda train dev test dataset"""
 16 | 
 17 | import sys
 18 | import csv
 19 | import os
 20 | import io
 21 | import re
 22 | 
 23 | import commonlib
 24 | 
 25 | 
 26 | class MRDA(object): 
 27 |     """
 28 |     dialogue act dataset mrda data process
 29 |     """
 30 |     def __init__(self): 
 31 |         """
 32 |         init instance
 33 |         """
 34 |         self.tag_id = 0
 35 |         self.map_tag_dict = dict()
 36 |         self.out_dir = "../../data/input/data/mrda"
 37 |         self.data_list = "./conf/mrda.conf"
 38 |         self.map_tag = "../../data/input/data/mrda/map_tag_id.txt"
 39 |         self.voc_map_tag = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/classmaps/map_01b_expanded_w_split"
 40 |         self.src_dir = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/data"
 41 |         self._load_file()
 42 |         self.tag_dict = commonlib.load_voc(self.voc_map_tag)
 43 | 
 44 |     def _load_file(self): 
 45 |         """
 46 |         load dataset filename
 47 |         """
 48 |         self.dadb_dict = {}
 49 |         self.trans_dict = {}
 50 |         self.data_dict = commonlib.load_dict(self.data_list)
 51 |         file_list, file_path = commonlib.get_file_list(self.src_dir)
 52 |         for i in range(len(file_list)): 
 53 |             name = file_list[i]
 54 |             keyword = name.split('.')[0]
 55 |             if 'dadb' in name: 
 56 |                 self.dadb_dict[keyword] = file_path[i]
 57 |             if 'trans' in name: 
 58 |                 self.trans_dict[keyword] = file_path[i]
 59 | 
 60 |     def load_dadb(self, data_type): 
 61 |         """
 62 |         load dadb dataset
 63 |         """
 64 |         dadb_dict = {}
 65 |         conv_id_list = []
 66 |         dadb_list = self.data_dict[data_type]
 67 |         for dadb_key in dadb_list: 
 68 |             dadb_file = self.dadb_dict[dadb_key]
 69 |             fr = io.open(dadb_file, 'r', encoding="utf8")
 70 |             row = csv.reader(fr, delimiter = ',')
 71 |             for line in row: 
 72 |                 elems = line
 73 |                 conv_id = elems[2]
 74 |                 conv_id_list.append(conv_id)
 75 |                 if len(elems) != 14: 
 76 |                     continue
 77 |                 error_code = elems[3]
 78 |                 da_tag = elems[-9]
 79 |                 da_ori_tag = elems[-6]
 80 |                 dadb_dict[conv_id] = (error_code, da_ori_tag, da_tag)
 81 |         return dadb_dict, conv_id_list
 82 | 
 83 |     def load_trans(self, data_type): 
 84 |         """load trans data"""
 85 |         trans_dict = {}
 86 |         trans_list = self.data_dict[data_type]
 87 |         for trans_key in trans_list: 
 88 |             trans_file = self.trans_dict[trans_key]
 89 |             fr = io.open(trans_file, 'r', encoding="utf8")
 90 |             row = csv.reader(fr, delimiter = ',')
 91 |             for line in row: 
 92 |                 elems = line
 93 |                 if len(elems) != 3: 
 94 |                     continue
 95 |                 conv_id = elems[0]
 96 |                 text = elems[1]
 97 |                 text_process = elems[2]
 98 |                 trans_dict[conv_id] = (text, text_process)
 99 |         return trans_dict
100 | 
101 |     def _parser_dataset(self, data_type): 
102 |         """
103 |         parser train dev test dataset
104 |         """
105 |         out_filename = "%s/%s.txt" % (self.out_dir, data_type)
106 |         dadb_dict, conv_id_list = self.load_dadb(data_type)
107 |         trans_dict = self.load_trans(data_type)
108 |         fw = io.open(out_filename, 'w', encoding="utf8")
109 |         for elem in conv_id_list: 
110 |             v_dadb = dadb_dict[elem]
111 |             v_trans = trans_dict[elem]
112 |             da_tag = v_dadb[2]
113 |             if da_tag not in self.tag_dict: 
114 |                 continue
115 |             tag = self.tag_dict[da_tag]
116 |             if tag == "Z": 
117 |                 continue
118 |             if tag not in self.map_tag_dict: 
119 |                 self.map_tag_dict[tag] = self.tag_id
120 |                 self.tag_id += 1
121 |             caller = elem.split('_')[0].split('-')[-1]
122 |             conv_no = elem.split('_')[0].split('-')[0]
123 |             out = "%s\t%s\t%s\t%s" % (conv_no, self.map_tag_dict[tag], caller, v_trans[0])
124 |             fw.write(u"%s\n" % out)
125 | 
126 |     def get_train_dataset(self): 
127 |         """
128 |         parser train dataset and print train.txt
129 |         """
130 |         self._parser_dataset("train")
131 | 
132 |     def get_dev_dataset(self): 
133 |         """
134 |         parser dev dataset and print dev.txt
135 |         """
136 |         self._parser_dataset("dev")
137 | 
138 |     def get_test_dataset(self): 
139 |         """
140 |         parser test dataset and print test.txt
141 |         """
142 |         self._parser_dataset("test")
143 | 
144 |     def get_labels(self): 
145 |         """
146 |         get tag and map ids file
147 |         """
148 |         fw = io.open(self.map_tag, 'w', encoding="utf8")
149 |         for elem in self.map_tag_dict: 
150 |             fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem]))
151 | 
152 |     def main(self): 
153 |         """
154 |         run data process
155 |         """
156 |         self.get_train_dataset()
157 |         self.get_dev_dataset()
158 |         self.get_test_dataset()
159 |         self.get_labels()
160 | 
161 | if __name__ == "__main__": 
162 |     mrda_inst = MRDA()
163 |     mrda_inst.main()
164 | 
165 | 
166 | 
167 | 
168 | 


--------------------------------------------------------------------------------
/DAM/utils/reader.py:
--------------------------------------------------------------------------------
  1 | import cPickle as pickle
  2 | import numpy as np
  3 | 
  4 | def unison_shuffle(data, seed=None):
  5 |     if seed is not None:
  6 |         np.random.seed(seed)
  7 | 
  8 |     y = np.array(data['y'])
  9 |     c = np.array(data['c'])
 10 |     r = np.array(data['r'])
 11 | 
 12 |     assert len(y) == len(c) == len(r)
 13 |     p = np.random.permutation(len(y))
 14 |     shuffle_data = {'y': y[p], 'c': c[p], 'r': r[p]}
 15 |     return shuffle_data
 16 | 
 17 | def split_c(c, split_id):
 18 |     '''c is a list, example context
 19 |        split_id is a integer, conf[_EOS_]
 20 |        return nested list
 21 |     '''
 22 |     turns = [[]]
 23 |     for _id in c:
 24 |         if _id != split_id:
 25 |             turns[-1].append(_id)
 26 |         else:
 27 |             turns.append([])
 28 |     if turns[-1] == [] and len(turns) > 1:
 29 |         turns.pop()
 30 |     return turns
 31 | 
 32 | def normalize_length(_list, length, cut_type='tail'):
 33 |     '''_list is a list or nested list, example turns/r/single turn c
 34 |        cut_type is head or tail, if _list len > length is used
 35 |        return a list len=length and min(read_length, length)
 36 |     '''
 37 |     real_length = len(_list)
 38 |     if real_length == 0:
 39 |         return [0]*length, 0
 40 | 
 41 |     if real_length <= length:
 42 |         if not isinstance(_list[0], list):
 43 |             _list.extend([0]*(length - real_length))
 44 |         else:
 45 |             _list.extend([[]]*(length - real_length))
 46 |         return _list, real_length
 47 | 
 48 |     if cut_type == 'head':
 49 |         return _list[:length], length
 50 |     if cut_type == 'tail':
 51 |         return _list[-length:], length
 52 | 
 53 | def produce_one_sample(data, index, split_id, max_turn_num, max_turn_len, turn_cut_type='tail', term_cut_type='tail'):
 54 |     '''max_turn_num=10
 55 |        max_turn_len=50
 56 |        return y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len
 57 |     '''
 58 |     c = data['c'][index]
 59 |     r = data['r'][index][:]
 60 |     y = data['y'][index]
 61 | 
 62 |     turns = split_c(c, split_id)
 63 |     #normalize turns_c length, nor_turns length is max_turn_num
 64 |     nor_turns, turn_len = normalize_length(turns, max_turn_num, turn_cut_type)
 65 | 
 66 |     nor_turns_nor_c = []
 67 |     term_len = []
 68 |     #nor_turn_nor_c length is max_turn_num, element is a list length is max_turn_len
 69 |     for c in nor_turns:
 70 |         #nor_c length is max_turn_len
 71 |         nor_c, nor_c_len = normalize_length(c, max_turn_len, term_cut_type)
 72 |         nor_turns_nor_c.append(nor_c)
 73 |         term_len.append(nor_c_len)
 74 | 
 75 |     nor_r, r_len = normalize_length(r, max_turn_len, term_cut_type)
 76 | 
 77 |     return y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len
 78 | 
 79 | def build_one_batch(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail'):
 80 |     _turns = []
 81 |     _tt_turns_len = []
 82 |     _every_turn_len = []
 83 | 
 84 |     _response = []
 85 |     _response_len = []
 86 | 
 87 |     _label = []
 88 | 
 89 |     for i in range(conf['batch_size']):
 90 |         index = batch_index * conf['batch_size'] + i
 91 |         y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len = produce_one_sample(data, index, conf['_EOS_'], conf['max_turn_num'],
 92 |                 conf['max_turn_len'], turn_cut_type, term_cut_type)
 93 | 
 94 |         _label.append(y)
 95 |         _turns.append(nor_turns_nor_c)
 96 |         _response.append(nor_r)
 97 |         _every_turn_len.append(term_len)
 98 |         _tt_turns_len.append(turn_len)
 99 |         _response_len.append(r_len)
100 | 
101 |     return _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label
102 | 
103 | def build_one_batch_dict(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail'):
104 |     _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label = build_one_batch(data, batch_index, conf, turn_cut_type, term_cut_type)
105 |     ans = {'turns': _turns,
106 |             'tt_turns_len': _tt_turns_len,
107 |             'every_turn_len': _every_turn_len,
108 |             'response': _response,
109 |             'response_len': _response_len,
110 |             'label': _label}
111 |     return ans
112 |     
113 | 
114 | def build_batches(data, conf, turn_cut_type='tail', term_cut_type='tail'):
115 |     _turns_batches = []
116 |     _tt_turns_len_batches = []
117 |     _every_turn_len_batches = []
118 | 
119 |     _response_batches = []
120 |     _response_len_batches = []
121 | 
122 |     _label_batches = []
123 | 
124 |     batch_len = int(len(data['y'])/conf['batch_size'])
125 |     for batch_index in range(batch_len):
126 |         _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label = build_one_batch(data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail')
127 | 
128 |         _turns_batches.append(_turns)
129 |         _tt_turns_len_batches.append(_tt_turns_len)
130 |         _every_turn_len_batches.append(_every_turn_len)
131 | 
132 |         _response_batches.append(_response)
133 |         _response_len_batches.append(_response_len)
134 | 
135 |         _label_batches.append(_label)
136 | 
137 |     ans = { 
138 |         "turns": _turns_batches, "tt_turns_len": _tt_turns_len_batches, "every_turn_len":_every_turn_len_batches,
139 |         "response": _response_batches, "response_len": _response_len_batches, "label": _label_batches
140 |     }   
141 | 
142 |     return ans 
143 | 
144 | if __name__ == '__main__':
145 |     conf = { 
146 |         "batch_size": 256,
147 |         "max_turn_num": 10, 
148 |         "max_turn_len": 50, 
149 |         "_EOS_": 28270,
150 |     }
151 |     train, val, test = pickle.load(open('../../data/data_small.pkl', 'rb'))
152 |     print('load data success')
153 |     
154 |     train_batches = build_batches(train, conf)
155 |     val_batches = build_batches(val, conf)
156 |     test_batches = build_batches(test, conf)
157 |     print('build batches success')
158 |     
159 |     pickle.dump([train_batches, val_batches, test_batches], open('../../data/batches_small.pkl', 'wb'))
160 |     print('dump success')
161 | 
162 | 
163 |         
164 | 
165 | 
166 |     
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 |     
176 |     
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/DGU/dgu/scripts/build_dstc2_dataset.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """build mrda train dev test dataset"""
 16 | 
 17 | import json
 18 | import sys
 19 | import csv
 20 | import os
 21 | import io
 22 | import re
 23 | 
 24 | import commonlib
 25 | 
 26 | 
 27 | class DSTC2(object): 
 28 |     """
 29 |     dialogue state tracking dstc2 data process
 30 |     """
 31 |     def __init__(self): 
 32 |         """
 33 |         init instance
 34 |         """
 35 |         self.map_tag_dict = {}
 36 |         self.out_dir = "../../data/input/data/dstc2/dstc2"
 37 |         self.out_asr_dir = "../../data/input/data/dstc2/dstc2_asr"
 38 |         self.data_list = "./conf/dstc2.conf"
 39 |         self.map_tag = "../../data/input/data/dstc2/dstc2/map_tag_id.txt"
 40 |         self.src_dir = "../../data/input/data/dstc2/source_data"
 41 |         self.onto_json = "../../data/input/data/dstc2/source_data/ontology_dstc2.json"
 42 |         self._load_file()
 43 |         self._load_ontology()
 44 | 
 45 |     def _load_file(self): 
 46 |         """
 47 |         load dataset filename
 48 |         """
 49 |         self.data_dict = commonlib.load_dict(self.data_list)
 50 |         for data_type in self.data_dict: 
 51 |             for i in range(len(self.data_dict[data_type])): 
 52 |                 self.data_dict[data_type][i] = os.path.join(self.src_dir, self.data_dict[data_type][i])
 53 | 
 54 |     def _load_ontology(self): 
 55 |         """
 56 |         load ontology tag
 57 |         """
 58 |         tag_id = 1
 59 |         self.map_tag_dict['none'] = 0
 60 |         fr = io.open(self.onto_json, 'r', encoding="utf8")
 61 |         ontology = json.load(fr)
 62 |         slots_values = ontology['informable']
 63 |         for slot in slots_values: 
 64 |             for value in slots_values[slot]: 
 65 |                 key = "%s_%s" % (slot, value)
 66 |                 self.map_tag_dict[key] = tag_id
 67 |                 tag_id += 1
 68 |             key = "%s_none" % (slot)
 69 |             self.map_tag_dict[key] = tag_id
 70 |             tag_id += 1
 71 | 
 72 |     def _parser_dataset(self, data_type): 
 73 |         """
 74 |         parser train dev test dataset
 75 |         """
 76 |         stat = os.path.exists(self.out_dir)
 77 |         if not stat: 
 78 |             os.makedirs(self.out_dir)
 79 |         asr_stat = os.path.exists(self.out_asr_dir)
 80 |         if not asr_stat: 
 81 |             os.makedirs(self.out_asr_dir)
 82 |         out_file = os.path.join(self.out_dir, "%s.txt" % data_type)
 83 |         out_asr_file = os.path.join(self.out_asr_dir, "%s.txt" % data_type)
 84 |         fw = io.open(out_file, 'w', encoding="utf8")
 85 |         fw_asr = io.open(out_asr_file, 'w', encoding="utf8")
 86 |         data_list = self.data_dict.get(data_type)
 87 |         for fn in data_list: 
 88 |             log_file = os.path.join(fn, "log.json")
 89 |             label_file = os.path.join(fn, "label.json")
 90 |             f_log = io.open(log_file, 'r', encoding="utf8")
 91 |             f_label = io.open(label_file, 'r', encoding="utf8")
 92 |             log_json = json.load(f_log)
 93 |             label_json = json.load(f_label)
 94 |             session_id = log_json['session-id']
 95 |             assert len(label_json["turns"]) == len(log_json["turns"])
 96 |             for i in range(len(label_json["turns"])): 
 97 |                 log_turn = log_json["turns"][i]
 98 |                 label_turn = label_json["turns"][i]
 99 |                 assert log_turn["turn-index"] == label_turn["turn-index"]
100 |                 labels = ["%s_%s" % (slot, label_turn["goal-labels"][slot]) for slot in label_turn["goal-labels"]]
101 |                 labels_ids = " ".join([str(self.map_tag_dict.get(label, self.map_tag_dict["%s_none" % label.split('_')[0]])) for label in labels])
102 |                 mach = log_turn['output']['transcript']
103 |                 user = label_turn['transcription']
104 |                 if not labels_ids.strip(): 
105 |                     labels_ids = self.map_tag_dict['none']
106 |                 out = "%s\t%s\1%s\t%s" % (session_id.encode('utf-8'), mach.encode('utf-8'), user.encode('utf-8'), labels_ids)
107 |                 user_asr = log_turn['input']['live']['asr-hyps'][0]['asr-hyp'].strip()
108 |                 out_asr = "%s\t%s\1%s\t%s" % (session_id.encode('utf-8'), mach.encode('utf-8'), user_asr.encode('utf-8'), labels_ids)
109 |                 fw.write(u"%s\n" % out)
110 |                 fw_asr.write(u"%s\n" % out_asr)
111 | 
112 |     def get_train_dataset(self): 
113 |         """
114 |         parser train dataset and print train.txt
115 |         """
116 |         self._parser_dataset("train")
117 | 
118 |     def get_dev_dataset(self): 
119 |         """
120 |         parser dev dataset and print dev.txt
121 |         """
122 |         self._parser_dataset("dev")
123 | 
124 |     def get_test_dataset(self): 
125 |         """
126 |         parser test dataset and print test.txt
127 |         """
128 |         self._parser_dataset("test")
129 | 
130 |     def get_labels(self): 
131 |         """
132 |         get tag and map ids file
133 |         """
134 |         fw = io.open(self.map_tag, 'w', encoding="utf8")
135 |         for elem in self.map_tag_dict: 
136 |             fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem]))
137 | 
138 |     def main(self): 
139 |         """
140 |         run data process
141 |         """
142 |         self.get_train_dataset()
143 |         self.get_dev_dataset()
144 |         self.get_test_dataset()
145 |         self.get_labels()
146 | 
147 | if __name__ == "__main__": 
148 |     dstc_inst = DSTC2()
149 |     dstc_inst.main()
150 | 
151 | 
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/DAM/bin/train_and_evaluate.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import time
  4 | 
  5 | import cPickle as pickle
  6 | import tensorflow as tf
  7 | import numpy as np
  8 | 
  9 | import utils.reader as reader
 10 | import utils.evaluation as eva
 11 | 
 12 | 
 13 | def train(conf, _model):
 14 |     
 15 |     if conf['rand_seed'] is not None:
 16 |         np.random.seed(conf['rand_seed'])
 17 | 
 18 |     if not os.path.exists(conf['save_path']):
 19 |         os.makedirs(conf['save_path'])
 20 | 
 21 |     # load data
 22 |     print('starting loading data')
 23 |     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
 24 |     train_data, val_data, test_data = pickle.load(open(conf["data_path"], 'rb'))    
 25 |     print('finish loading data')
 26 |     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
 27 | 
 28 |     val_batches = reader.build_batches(val_data, conf)
 29 | 
 30 |     print("finish building test batches")
 31 |     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
 32 | 
 33 |     # refine conf
 34 |     batch_num = int(len(train_data['y']) / conf["batch_size"])
 35 |     val_batch_num = len(val_batches["response"])
 36 | 
 37 |     conf["train_steps"] = conf["num_scan_data"] * batch_num
 38 |     conf["save_step"] = int(max(1, batch_num / 10))
 39 |     conf["print_step"] = int(max(1, batch_num / 100))
 40 | 
 41 |     print('configurations: %s' %conf)
 42 | 
 43 |     print('model sucess')
 44 |     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
 45 | 
 46 |     _graph = _model.build_graph()
 47 |     print('build graph sucess')
 48 |     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
 49 | 
 50 |     with tf.Session(graph=_graph) as sess:
 51 |         _model.init.run();
 52 |         if conf["init_model"]:
 53 |             _model.saver.restore(sess, conf["init_model"])
 54 |             print("sucess init %s" %conf["init_model"])
 55 | 
 56 |         average_loss = 0.0
 57 |         batch_index = 0
 58 |         step = 0
 59 |         best_result = [0, 0, 0, 0]
 60 | 
 61 |         for step_i in xrange(conf["num_scan_data"]):
 62 |             #for batch_index in rng.permutation(range(batch_num)):
 63 |             print('starting shuffle train data')
 64 |             shuffle_train = reader.unison_shuffle(train_data)
 65 |             train_batches = reader.build_batches(shuffle_train, conf)
 66 |             print('finish building train data')
 67 |             for batch_index in range(batch_num):
 68 | 
 69 |                 feed = {
 70 |                     _model.turns: train_batches["turns"][batch_index], 
 71 |                     _model.tt_turns_len: train_batches["tt_turns_len"][batch_index],
 72 |                     _model.every_turn_len: train_batches["every_turn_len"][batch_index],
 73 |                     _model.response: train_batches["response"][batch_index], 
 74 |                     _model.response_len: train_batches["response_len"][batch_index],
 75 |                     _model.label: train_batches["label"][batch_index]
 76 |                 }
 77 | 
 78 |                 batch_index = (batch_index + 1) % batch_num;
 79 | 
 80 |                 _, curr_loss = sess.run([_model.g_updates, _model.loss], feed_dict = feed)
 81 | 
 82 |                 
 83 |                 average_loss += curr_loss
 84 | 
 85 |                 step += 1
 86 | 
 87 |                 if step % conf["print_step"] == 0 and step > 0:
 88 |                     g_step, lr = sess.run([_model.global_step, _model.learning_rate])
 89 |                     print('step: %s, lr: %s' %(g_step, lr))
 90 |                     print("processed: [" + str(step * 1.0 / batch_num) + "] loss: [" + str(average_loss / conf["print_step"]) + "]" )
 91 |                     average_loss = 0
 92 | 
 93 |                 
 94 |                 if step % conf["save_step"] == 0 and step > 0:
 95 |                     index = step / conf['save_step']
 96 |                     score_file_path = conf['save_path'] + 'score.' + str(index)
 97 |                     score_file = open(score_file_path, 'w')
 98 |                     print('save step: %s' %index)
 99 |                     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
100 | 
101 |                     for batch_index in xrange(val_batch_num):
102 |                 
103 |                         feed = { 
104 |                             _model.turns: val_batches["turns"][batch_index],
105 |                             _model.tt_turns_len: val_batches["tt_turns_len"][batch_index],
106 |                             _model.every_turn_len: val_batches["every_turn_len"][batch_index],
107 |                             _model.response: val_batches["response"][batch_index],
108 |                             _model.response_len: val_batches["response_len"][batch_index],
109 |                             _model.label: val_batches["label"][batch_index]
110 |                         }   
111 |                 
112 |                         scores = sess.run(_model.logits, feed_dict = feed)
113 |                     
114 |                         for i in xrange(conf["batch_size"]):
115 |                             score_file.write(
116 |                                 str(scores[i]) + '\t' + 
117 |                                 str(val_batches["label"][batch_index][i]) + '\n')
118 |                     score_file.close()
119 | 
120 |                     #write evaluation result
121 |                     result = eva.evaluate(score_file_path)
122 |                     result_file_path = conf["save_path"] + "result." + str(index)
123 |                     with open(result_file_path, 'w') as out_file:
124 |                         for p_at in result:
125 |                             out_file.write(str(p_at) + '\n')
126 |                     print('finish evaluation')
127 |                     print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
128 | 
129 |                     if result[1] + result[2] > best_result[1] + best_result[2]:
130 |                         best_result = result
131 |                         _save_path = _model.saver.save(sess, conf["save_path"] + "model.ckpt." + str(step / conf["save_step"]))
132 |                         print("succ saving model in " + _save_path)
133 |                         print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
134 |                     
135 |                 
136 | 
137 | 


--------------------------------------------------------------------------------
/DGU/dgu/scripts/build_atis_dataset.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """build swda train dev test dataset"""
 17 | 
 18 | import json
 19 | import sys
 20 | import csv
 21 | import os
 22 | import io
 23 | import re
 24 | 
 25 | 
 26 | class ATIS(object): 
 27 |     """
 28 |     nlu dataset atis data process
 29 |     """
 30 |     def __init__(self): 
 31 |         """
 32 |         init instance
 33 |         """
 34 |         self.slot_id = 2
 35 |         self.slot_dict = {"PAD": 0, "O": 1}
 36 |         self.intent_id = 0
 37 |         self.intent_dict = dict()
 38 |         self.src_dir = "../../data/input/data/atis/source_data"
 39 |         self.out_slot_dir = "../../data/input/data/atis/atis_slot"
 40 |         self.out_intent_dir = "../../data/input/data/atis/atis_intent"
 41 |         self.map_tag_slot = "../../data/input/data/atis/atis_slot/map_tag_slot_id.txt"
 42 |         self.map_tag_intent = "../../data/input/data/atis/atis_intent/map_tag_intent_id.txt"
 43 | 
 44 |     def _load_file(self, data_type): 
 45 |         """
 46 |         load dataset filename
 47 |         """
 48 |         slot_stat = os.path.exists(self.out_slot_dir)
 49 |         if not slot_stat: 
 50 |             os.makedirs(self.out_slot_dir)
 51 |         intent_stat = os.path.exists(self.out_intent_dir)
 52 |         if not intent_stat: 
 53 |             os.makedirs(self.out_intent_dir)
 54 |         src_examples = []
 55 |         json_file = os.path.join(self.src_dir, "%s.json" % data_type)
 56 |         load_f = io.open(json_file, 'r', encoding="utf8")
 57 |         json_dict = json.load(load_f)
 58 |         examples = json_dict['rasa_nlu_data']['common_examples']
 59 |         for example in examples: 
 60 |             text = example.get('text')
 61 |             intent = example.get('intent')
 62 |             entities = example.get('entities')
 63 |             src_examples.append((text, intent, entities))
 64 |         return src_examples
 65 | 
 66 |     def _parser_intent_data(self, examples, data_type): 
 67 |         """
 68 |         parser intent dataset
 69 |         """
 70 |         out_filename = "%s/%s.txt" % (self.out_intent_dir, data_type)
 71 |         fw = io.open(out_filename, 'w', encoding="utf8")
 72 |         for example in examples: 
 73 |             if example[1] not in self.intent_dict: 
 74 |                 self.intent_dict[example[1]] = self.intent_id
 75 |                 self.intent_id += 1
 76 |             fw.write(u"%s\t%s\n" % (self.intent_dict[example[1]], example[0].lower()))
 77 | 
 78 |         fw = io.open(self.map_tag_intent, 'w', encoding="utf8")
 79 |         for tag in self.intent_dict: 
 80 |             fw.write(u"%s\t%s\n" % (tag, self.intent_dict[tag]))
 81 | 
 82 |     def _parser_slot_data(self, examples, data_type): 
 83 |         """
 84 |         parser slot dataset
 85 |         """
 86 |         out_filename = "%s/%s.txt" % (self.out_slot_dir, data_type)
 87 |         fw = io.open(out_filename, 'w', encoding="utf8")
 88 |         for example in examples: 
 89 |             tags = []
 90 |             text = example[0]
 91 |             entities = example[2]
 92 |             if not entities: 
 93 |                 tags = [str(self.slot_dict['O'])] * len(text.strip().split())
 94 |                 continue
 95 |             for i in range(len(entities)): 
 96 |                 enty = entities[i]
 97 |                 start = enty['start']
 98 |                 value_num = len(enty['value'].split())
 99 |                 tags_slot = []
100 |                 for j in range(value_num): 
101 |                     if j == 0: 
102 |                         bround_tag = "B"
103 |                     else: 
104 |                         bround_tag = "I"
105 |                     tag = "%s-%s" % (bround_tag, enty['entity'])
106 |                     if tag not in self.slot_dict: 
107 |                         self.slot_dict[tag] = self.slot_id
108 |                         self.slot_id += 1
109 |                     tags_slot.append(str(self.slot_dict[tag]))
110 |                 if i == 0: 
111 |                     if start not in [0, 1]: 
112 |                         prefix_num = len(text[: start].strip().split())
113 |                         tags.extend([str(self.slot_dict['O'])] * prefix_num)
114 |                     tags.extend(tags_slot)
115 |                 else: 
116 |                     prefix_num = len(text[entities[i - 1]['end']: start].strip().split())
117 |                     tags.extend([str(self.slot_dict['O'])] * prefix_num)
118 |                     tags.extend(tags_slot)
119 |             if entities[-1]['end'] < len(text): 
120 |                 suffix_num = len(text[entities[-1]['end']:].strip().split())
121 |                 tags.extend([str(self.slot_dict['O'])] * suffix_num)
122 |             fw.write(u"%s\t%s\n" % (text.encode('utf8'), " ".join(tags).encode('utf8')))
123 |         
124 |         fw = io.open(self.map_tag_slot, 'w', encoding="utf8")
125 |         for slot in self.slot_dict: 
126 |             fw.write(u"%s\t%s\n" % (slot, self.slot_dict[slot]))
127 | 
128 |     def get_train_dataset(self): 
129 |         """
130 |         parser train dataset and print train.txt
131 |         """
132 |         train_examples = self._load_file("train")
133 |         self._parser_intent_data(train_examples, "train")
134 |         self._parser_slot_data(train_examples, "train")
135 | 
136 |     def get_test_dataset(self): 
137 |         """
138 |         parser test dataset and print test.txt
139 |         """
140 |         test_examples = self._load_file("test")
141 |         self._parser_intent_data(test_examples, "test")
142 |         self._parser_slot_data(test_examples, "test")
143 | 
144 |     def main(self): 
145 |         """
146 |         run data process
147 |         """
148 |         self.get_train_dataset()
149 |         self.get_test_dataset()
150 | 
151 | 
152 | if __name__ == "__main__": 
153 |     atis_inst = ATIS()
154 |     atis_inst.main()
155 | 
156 | 
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/DGU/dgu/define_paradigm.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """define network paradigm"""
 15 | 
 16 | import sys
 17 | import re
 18 | 
 19 | import paddle
 20 | import paddle.fluid as fluid
 21 | 
 22 | 
 23 | class Paradigm(object):
 24 |     """
 25 |     define network paradigm
 26 |     """
 27 | 
 28 |     def __init__(self, task_name):
 29 |         """
 30 |         init
 31 |         """
 32 |         self.task_name = task_name
 33 | 
 34 |     def create_cls(self, transformer_inst, params):
 35 |         """
 36 |         create classify paradigm network
 37 |         """
 38 |         cls_feats = transformer_inst.get_pooled_output()
 39 |         cls_feats = fluid.layers.dropout(
 40 |             x=cls_feats,
 41 |             dropout_prob=0.1,
 42 |             dropout_implementation="upscale_in_train")
 43 |         logits = fluid.layers.fc(
 44 |             input=cls_feats,
 45 |             size=params['num_labels'],
 46 |             param_attr=fluid.ParamAttr(
 47 |                 name="cls_out_w",
 48 |                 initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
 49 |             bias_attr=fluid.ParamAttr(
 50 |                 name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
 51 | 
 52 |         if not params['is_training']:
 53 |             probs = fluid.layers.softmax(logits)
 54 |             results = {"probs": probs}
 55 |             return results
 56 | 
 57 |         ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
 58 |             logits=logits, label=params['labels'], return_softmax=True)
 59 |         loss = fluid.layers.mean(x=ce_loss)
 60 |         num_seqs = fluid.layers.create_tensor(dtype='int64')
 61 |         accuracy = fluid.layers.accuracy(
 62 |             input=probs, label=params['labels'], total=num_seqs)
 63 | 
 64 |         results = {
 65 |             "loss": loss,
 66 |             "probs": probs,
 67 |             "accuracy": accuracy,
 68 |             "num_seqs": num_seqs
 69 |         }
 70 |         return results
 71 | 
 72 |     def create_multi_cls(self, transformer_inst, params):
 73 |         """
 74 |         create multi classify paradigm network
 75 |         """
 76 |         cls_feats = transformer_inst.get_pooled_output()
 77 |         cls_feats = fluid.layers.dropout(
 78 |             x=cls_feats,
 79 |             dropout_prob=0.1,
 80 |             dropout_implementation="upscale_in_train")
 81 |         logits = fluid.layers.fc(
 82 |             input=cls_feats,
 83 |             size=params['num_labels'],
 84 |             param_attr=fluid.ParamAttr(
 85 |                 name="cls_out_w",
 86 |                 initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
 87 |             bias_attr=fluid.ParamAttr(
 88 |                 name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
 89 | 
 90 |         labels_onehot = fluid.layers.cast(params["labels"], dtype='float32')
 91 |         ce_loss = fluid.layers.reduce_sum(
 92 |             fluid.layers.sigmoid_cross_entropy_with_logits(
 93 |                 x=logits, label=labels_onehot))
 94 |         loss = fluid.layers.mean(x=ce_loss)
 95 |         probs = fluid.layers.sigmoid(logits)
 96 | 
 97 |         if not params['is_training']:
 98 |             results = {"probs": probs}
 99 |             return results
100 | 
101 |         num_seqs = fluid.layers.tensor.fill_constant(
102 |             shape=[1], dtype='int64', value=1)
103 | 
104 |         results = {"loss": loss, "probs": probs, "num_seqs": num_seqs}
105 |         return results
106 | 
107 |     def create_sequence_tagging(self, transformer_inst, params):
108 |         """
109 |         create sequence tagging paradigm
110 |         """
111 |         output_layer = transformer_inst.get_sequence_output()
112 |         hidden_size = output_layer.shape[-1]
113 |         output_layer = fluid.layers.stack(output_layer, axis=1)
114 |         output_layer = fluid.layers.reshape(output_layer, [-1, hidden_size])
115 | 
116 |         logits = fluid.layers.fc(input=output_layer, size=params['num_labels'])
117 |         probs = fluid.layers.cast(
118 |             fluid.layers.argmax(
119 |                 logits, axis=1), dtype='int32')
120 | 
121 |         if not params['is_training']:
122 |             results = {"probs": probs}
123 |             return results
124 | 
125 |         num_seqs = fluid.layers.tensor.fill_constant(
126 |             shape=[1], dtype='int64', value=1)
127 |         y_label_reshape = fluid.layers.cast(
128 |             fluid.layers.reshape(params['labels'], [-1]), dtype='int32')
129 |         correct_prediction = fluid.layers.equal(probs, y_label_reshape)
130 |         accuracy = fluid.layers.mean(
131 |             fluid.layers.cast(
132 |                 correct_prediction, dtype='float32'))
133 |         ce_loss = fluid.layers.softmax_with_cross_entropy(logits=logits, \
134 |                 label=fluid.layers.reshape(params['labels'], [-1, 1]))
135 |         loss = fluid.layers.mean(x=ce_loss)
136 | 
137 |         results = {
138 |             "loss": loss,
139 |             "probs": probs,
140 |             "accuracy": accuracy,
141 |             "num_seqs": num_seqs
142 |         }
143 |         return results
144 | 
145 |     def paradigm(self, transformer_inst, params):
146 |         """
147 |         run paradigm
148 |         """
149 |         results = None
150 |         if self.task_name == 'udc':
151 |             results = self.create_cls(transformer_inst, params)
152 |         elif self.task_name == 'swda':
153 |             results = self.create_cls(transformer_inst, params)
154 |         elif self.task_name == 'mrda':
155 |             results = self.create_cls(transformer_inst, params)
156 |         elif self.task_name == 'atis_intent':
157 |             results = self.create_cls(transformer_inst, params)
158 |         elif self.task_name == 'atis_slot':
159 |             results = self.create_sequence_tagging(transformer_inst, params)
160 |         elif self.task_name == 'dstc2':
161 |             results = self.create_multi_cls(transformer_inst, params)
162 |         return results
163 | 


--------------------------------------------------------------------------------
/DGU/predict.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import io
 17 | import os
 18 | import sys
 19 | import numpy as np
 20 | import argparse
 21 | import collections
 22 | import paddle
 23 | import paddle.fluid as fluid
 24 | 
 25 | import dgu.reader as reader
 26 | from dgu_net import create_net
 27 | import dgu.define_paradigm as define_paradigm
 28 | import dgu.define_predict_pack as define_predict_pack
 29 | 
 30 | from dgu.utils.configure import PDConfig
 31 | from dgu.utils.input_field import InputField
 32 | from dgu.utils.model_check import check_cuda
 33 | import dgu.utils.save_load_io as save_load_io
 34 | from dgu.utils.py23 import tab_tok, rt_tok
 35 | 
 36 | 
 37 | def do_predict(args):
 38 |     """predict function"""
 39 | 
 40 |     task_name = args.task_name.lower()
 41 |     paradigm_inst = define_paradigm.Paradigm(task_name)
 42 |     pred_inst = define_predict_pack.DefinePredict()
 43 |     pred_func = getattr(pred_inst, pred_inst.task_map[task_name])
 44 | 
 45 |     processors = {
 46 |         'udc': reader.UDCProcessor,
 47 |         'swda': reader.SWDAProcessor,
 48 |         'mrda': reader.MRDAProcessor,
 49 |         'atis_slot': reader.ATISSlotProcessor,
 50 |         'atis_intent': reader.ATISIntentProcessor,
 51 |         'dstc2': reader.DSTC2Processor,
 52 |     }
 53 | 
 54 |     test_prog = fluid.default_main_program()
 55 |     startup_prog = fluid.default_startup_program()
 56 | 
 57 |     with fluid.program_guard(test_prog, startup_prog):
 58 |         test_prog.random_seed = args.random_seed
 59 |         startup_prog.random_seed = args.random_seed
 60 | 
 61 |         with fluid.unique_name.guard():
 62 | 
 63 |             # define inputs of the network
 64 |             num_labels = len(processors[task_name].get_labels())
 65 | 
 66 |             src_ids = fluid.data(
 67 |                 name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
 68 |             pos_ids = fluid.data(
 69 |                 name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
 70 |             sent_ids = fluid.data(
 71 |                 name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
 72 |             input_mask = fluid.data(
 73 |                 name='input_mask',
 74 |                 shape=[-1, args.max_seq_len],
 75 |                 dtype='float32')
 76 |             if args.task_name == 'atis_slot':
 77 |                 labels = fluid.data(
 78 |                     name='labels', shape=[-1, args.max_seq_len], dtype='int64')
 79 |             elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
 80 |                 labels = fluid.data(
 81 |                     name='labels', shape=[-1, num_labels], dtype='int64')
 82 |             else:
 83 |                 labels = fluid.data(name='labels', shape=[-1, 1], dtype='int64')
 84 | 
 85 |             input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
 86 |             input_field = InputField(input_inst)
 87 |             data_reader = fluid.io.PyReader(
 88 |                 feed_list=input_inst, capacity=4, iterable=False)
 89 | 
 90 |             results = create_net(
 91 |                 is_training=False,
 92 |                 model_input=input_field,
 93 |                 num_labels=num_labels,
 94 |                 paradigm_inst=paradigm_inst,
 95 |                 args=args)
 96 | 
 97 |             probs = results.get("probs", None)
 98 | 
 99 |             probs.persistable = True
100 | 
101 |             fetch_list = [probs.name]
102 | 
103 |     #for_test is True if change the is_test attribute of operators to True
104 |     test_prog = test_prog.clone(for_test=True)
105 | 
106 |     if args.use_cuda:
107 |         place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
108 |     else:
109 |         place = fluid.CPUPlace()
110 | 
111 |     exe = fluid.Executor(place)
112 |     exe.run(startup_prog)
113 | 
114 |     assert (args.init_from_params) or (args.init_from_pretrain_model)
115 | 
116 |     if args.init_from_params:
117 |         save_load_io.init_from_params(args, exe, test_prog)
118 |     if args.init_from_pretrain_model:
119 |         save_load_io.init_from_pretrain_model(args, exe, test_prog)
120 | 
121 |     compiled_test_prog = fluid.CompiledProgram(test_prog)
122 | 
123 |     processor = processors[task_name](data_dir=args.data_dir,
124 |                                       vocab_path=args.vocab_path,
125 |                                       max_seq_len=args.max_seq_len,
126 |                                       do_lower_case=args.do_lower_case,
127 |                                       in_tokens=args.in_tokens,
128 |                                       task_name=task_name,
129 |                                       random_seed=args.random_seed)
130 |     batch_generator = processor.data_generator(
131 |         batch_size=args.batch_size, phase='test', shuffle=False)
132 | 
133 |     data_reader.decorate_batch_generator(batch_generator)
134 |     data_reader.start()
135 | 
136 |     all_results = []
137 |     while True:
138 |         try:
139 |             results = exe.run(compiled_test_prog, fetch_list=fetch_list)
140 |             all_results.extend(results[0])
141 |         except fluid.core.EOFException:
142 |             data_reader.reset()
143 |             break
144 | 
145 |     np.set_printoptions(precision=4, suppress=True)
146 |     print("Write the predicted results into the output_prediction_file")
147 | 
148 |     fw = io.open(args.output_prediction_file, 'w', encoding="utf8")
149 |     if task_name not in ['atis_slot']:
150 |         for index, result in enumerate(all_results):
151 |             tags = pred_func(result)
152 |             fw.write("%s%s%s%s" % (index, tab_tok, tags, rt_tok))
153 |     else:
154 |         tags = pred_func(all_results, args.max_seq_len)
155 |         for index, tag in enumerate(tags):
156 |             fw.write("%s%s%s%s" % (index, tab_tok, tag, rt_tok))
157 | 
158 | 
159 | if __name__ == "__main__":
160 | 
161 |     args = PDConfig(yaml_file="./data/config/dgu.yaml")
162 |     args.build()
163 |     args.Print()
164 | 
165 |     check_cuda(args.use_cuda)
166 | 
167 |     do_predict(args)
168 | 


--------------------------------------------------------------------------------
/ADE/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | export FLAGS_sync_nccl_allreduce=0
  4 | export FLAGS_eager_delete_tensor_gb=1.0
  5 | 
  6 | export CUDA_VISIBLE_DEVICES=0
  7 | 
  8 | if [ $# -ne 2 ]
  9 | then
 10 |   echo "please input parameters: TRAIN_TYPE and TASK_TYPE"
 11 |   echo "TRAIN_TYPE: [matching|seq2seq_naive|seq2seq_att|keywords|human]"
 12 |   echo "TASK_TYPE: [train|predict|evaluate|inference]"
 13 |   exit 255
 14 | fi
 15 | 
 16 | TRAIN_TYPE=$1
 17 | TASK_TYPE=$2
 18 | 
 19 | candi_train_type=("matching" "seq2seq_naive" "seq2seq_att" "keywords" "human")
 20 | candi_task_type=("train" "predict" "evaluate" "inference")
 21 | 
 22 | if [[ ! "${candi_train_type[@]}" =~ ${TRAIN_TYPE} ]] 
 23 | then
 24 |   echo "unknown parameter: ${TRAIN_TYPE}, just support [matching|seq2seq_naive|seq2seq_att|keywords|human]"
 25 |   exit 255
 26 | fi
 27 | 
 28 | if [[ ! "${candi_task_type[@]}" =~ ${TASK_TYPE} ]] 
 29 | then
 30 |   echo "unknown parameter: ${TRAIN_TYPE}, just support [train|predict|evaluate|inference]"
 31 |   exit 255
 32 | fi
 33 | 
 34 | INPUT_PATH="data/input/data"
 35 | OUTPUT_PATH="data/output"
 36 | SAVED_MODELS="data/saved_models"
 37 | INFERENCE_MODEL="data/inference_models"
 38 | PYTHON_PATH="python"
 39 | 
 40 | #train pretrain model
 41 | if  [ ! "$CUDA_VISIBLE_DEVICES" ]
 42 | then
 43 |   export CPU_NUM=1
 44 |   use_cuda=false
 45 | else
 46 |   use_cuda=true
 47 | fi
 48 | 
 49 | #training
 50 | function pretrain_train()
 51 | {
 52 | 
 53 |     pretrain_model_path="${SAVED_MODELS}/matching_pretrained"
 54 |     if [ -f ${pretrain_model_path} ]
 55 |     then
 56 |         rm ${pretrain_model_path}
 57 |     fi
 58 | 
 59 |     if [ ! -d ${pretrain_model_path} ]
 60 |     then
 61 |         mkdir ${pretrain_model_path}
 62 |     fi
 63 | 
 64 |     ${PYTHON_PATH} -u main.py \
 65 |       --do_train=true \
 66 |       --use_cuda=${1} \
 67 |       --loss_type="CLS" \
 68 |       --max_seq_len=50 \
 69 |       --save_model_path=${pretrain_model_path} \
 70 |       --save_param="params" \
 71 |       --training_file="${INPUT_PATH}/unlabel_data/train.ids" \
 72 |       --epoch=20 \
 73 |       --print_step=1 \
 74 |       --save_step=400 \
 75 |       --batch_size=256 \
 76 |       --hidden_size=256 \
 77 |       --emb_size=256 \
 78 |       --vocab_size=484016 \
 79 |       --learning_rate=0.001 \
 80 |       --sample_pro=0.1 
 81 | }
 82 | 
 83 | function finetuning_train()
 84 | {
 85 |     save_model_path="${SAVED_MODELS}/${2}_finetuned"
 86 | 
 87 |     if [ -f ${save_model_path} ]
 88 |     then
 89 |         rm ${save_model_path}
 90 |     fi
 91 | 
 92 |     if [ ! -d ${save_model_path} ]
 93 |     then
 94 |         mkdir ${save_model_path}
 95 |     fi
 96 | 
 97 |     ${PYTHON_PATH} -u main.py \
 98 |       --do_train=true \
 99 |       --use_cuda=${1} \
100 |       --loss_type="L2" \
101 |       --max_seq_len=50 \
102 |       --init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/params/step_final" \
103 |       --save_model_path=${save_model_path} \
104 |       --save_param="params" \
105 |       --training_file="${INPUT_PATH}/label_data/${2}/train.ids" \
106 |       --epoch=50 \
107 |       --print_step=1 \
108 |       --save_step=400 \
109 |       --batch_size=256 \
110 |       --hidden_size=256 \
111 |       --emb_size=256 \
112 |       --vocab_size=484016 \
113 |       --learning_rate=0.001 \
114 |       --sample_pro=0.1
115 | }
116 | 
117 | #predict
118 | function pretrain_predict()
119 | {
120 |     ${PYTHON_PATH} -u main.py \
121 |       --do_predict=true \
122 |       --use_cuda=${1} \
123 |       --predict_file="${INPUT_PATH}/unlabel_data/test.ids" \
124 |       --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
125 |       --loss_type="CLS" \
126 |       --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
127 |       --max_seq_len=50 \
128 |       --batch_size=256 \
129 |       --hidden_size=256 \
130 |       --emb_size=256 \
131 |       --vocab_size=484016
132 | }
133 | 
134 | function finetuning_predict()
135 | {
136 |     ${PYTHON_PATH} -u main.py \
137 |       --do_predict=true \
138 |       --use_cuda=${1} \
139 |       --predict_file="${INPUT_PATH}/label_data/${2}/test.ids" \
140 |       --init_from_params=${SAVED_MODELS}/trained_models/${2}_finetuned/params \
141 |       --loss_type="L2" \
142 |       --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
143 |       --max_seq_len=50 \
144 |       --batch_size=256 \
145 |       --hidden_size=256 \
146 |       --emb_size=256 \
147 |       --vocab_size=484016
148 | }
149 | 
150 | #evaluate
151 | function pretrain_eval()
152 | {
153 |     ${PYTHON_PATH} -u main.py \
154 |       --do_eval=true \
155 |       --use_cuda=${1} \
156 |       --evaluation_file="${INPUT_PATH}/unlabel_data/test.ids" \
157 |       --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
158 |       --loss_type="CLS" 
159 | }
160 | 
161 | function finetuning_eval()
162 | {
163 |     ${PYTHON_PATH} -u main.py \
164 |       --do_eval=true \
165 |       --use_cuda=${1} \
166 |       --evaluation_file="${INPUT_PATH}/label_data/${2}/test.ids" \
167 |       --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
168 |       --loss_type="L2" 
169 | }
170 | 
171 | #inference model
172 | function pretrain_infer()
173 | {
174 |     ${PYTHON_PATH} -u main.py \
175 |       --do_save_inference_model=true \
176 |       --use_cuda=${1} \
177 |       --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
178 |       --inference_model_dir="${INFERENCE_MODEL}/matching_inference_model"
179 | 
180 | }
181 | function finetuning_infer()
182 | {
183 |     ${PYTHON_PATH} -u main.py \
184 |       --do_save_inference_model=true \
185 |       --use_cuda=${1} \
186 |       --init_from_params="${SAVED_MODELS}/trained_models/${2}_finetuned/params" \
187 |       --inference_model_dir="${INFERENCE_MODEL}/${2}_inference_model"
188 | }
189 | 
190 | if [ "${TASK_TYPE}" = "train" ]
191 | then
192 |     echo "train ${TRAIN_TYPE} start.........."
193 |     if [ "${TRAIN_TYPE}" = "matching" ]
194 |     then
195 |         pretrain_train ${use_cuda};
196 |     else
197 |         finetuning_train ${use_cuda} ${TRAIN_TYPE};
198 |     fi
199 | elif [ "${TASK_TYPE}" = "predict" ]
200 | then
201 |     echo "predict ${TRAIN_TYPE} start.........."
202 |     if [ "${TRAIN_TYPE}" = "matching" ]
203 |     then
204 |         pretrain_predict ${use_cuda};
205 |     else
206 |         finetuning_predict ${use_cuda} ${TRAIN_TYPE};
207 |     fi
208 | elif [ "${TASK_TYPE}" = "evaluate" ]
209 | then
210 |     echo "evaluate ${TRAIN_TYPE} start.........."
211 |     if [ "${TRAIN_TYPE}" = "matching" ]
212 |     then
213 |         pretrain_eval ${use_cuda};
214 |     else
215 |         finetuning_eval ${use_cuda} ${TRAIN_TYPE};
216 |     fi
217 | elif [ "${TASK_TYPE}" = "inference" ]
218 | then
219 |     echo "save ${TRAIN_TYPE} inference model start.........."
220 |     if [ "${TRAIN_TYPE}" = "matching" ]
221 |     then
222 |         pretrain_infer ${use_cuda};
223 |     else
224 |         finetuning_infer ${use_cuda} ${TRAIN_TYPE};
225 |     fi
226 | else
227 |     exit 255
228 | fi
229 | 
230 | 


--------------------------------------------------------------------------------
/DAM/models/self_match_net.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import cPickle as pickle
  4 | 
  5 | import utils.layers as layers
  6 | import utils.operations as op
  7 | 
  8 | class Net(object):
  9 |     '''Add positional encoding(initializer lambda is 0),
 10 |        cross-attention, cnn integrated and grad clip by value.
 11 | 
 12 |     Attributes:
 13 |         conf: a configuration paramaters dict
 14 |         word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size]
 15 |     '''
 16 |     def __init__(self, conf):
 17 |         self._graph = tf.Graph()
 18 |         self._conf = conf
 19 | 
 20 |         if self._conf['word_emb_init'] is not None:
 21 |             print('loading word emb init')
 22 |             self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb'))
 23 |         else:
 24 |             self._word_embedding_init = None
 25 | 
 26 |     def build_graph(self):
 27 |         with self._graph.as_default():
 28 |             rand_seed = self._conf['rand_seed']
 29 |             tf.set_random_seed(rand_seed)
 30 | 
 31 |             #word embedding
 32 |             if self._word_embedding_init is not None:
 33 |                 word_embedding_initializer = tf.constant_initializer(self._word_embedding_init)
 34 |             else:
 35 |                 word_embedding_initializer = tf.random_normal_initializer(stddev=0.1)
 36 | 
 37 |             self._word_embedding = tf.get_variable(
 38 |                 name='word_embedding',
 39 |                 shape=[self._conf['vocab_size']+1, self._conf['emb_size']],
 40 |                 dtype=tf.float32,
 41 |                 initializer=word_embedding_initializer)
 42 | 
 43 | 
 44 |             #define placehloders
 45 |             self.turns = tf.placeholder(
 46 |                 tf.int32,
 47 |                 shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]])
 48 | 
 49 |             self.tt_turns_len = tf.placeholder(
 50 |                 tf.int32,
 51 |                 shape=[self._conf["batch_size"]])
 52 | 
 53 |             self.every_turn_len = tf.placeholder(
 54 |                 tf.int32,
 55 |                 shape=[self._conf["batch_size"], self._conf["max_turn_num"]])
 56 |     
 57 |             self.response = tf.placeholder(
 58 |                 tf.int32, 
 59 |                 shape=[self._conf["batch_size"], self._conf["max_turn_len"]])
 60 | 
 61 |             self.response_len = tf.placeholder(
 62 |                 tf.int32, 
 63 |                 shape=[self._conf["batch_size"]])
 64 | 
 65 |             self.label = tf.placeholder(
 66 |                 tf.float32, 
 67 |                 shape=[self._conf["batch_size"]])
 68 | 
 69 | 
 70 |             #define operations
 71 |             #response part
 72 |             Hr = tf.nn.embedding_lookup(self._word_embedding, self.response)
 73 |             #Hr_stack = [Hr]
 74 | 
 75 |             if self._conf['is_positional'] and self._conf['stack_num'] > 0:
 76 |                 with tf.variable_scope('positional'):
 77 |                     Hr = op.positional_encoding_vector(Hr, max_timescale=10)
 78 |             Hr_stack = [Hr]
 79 | 
 80 |             for index in range(self._conf['stack_num']):
 81 |                 with tf.variable_scope('self_stack_' + str(index)):
 82 |                     Hr = layers.block(
 83 |                         Hr, Hr, Hr, 
 84 |                         Q_lengths=self.response_len, K_lengths=self.response_len)
 85 |                     Hr_stack.append(Hr)
 86 | 
 87 |             Hr_stack = tf.stack(Hr_stack, axis=-1)
 88 | 
 89 | 
 90 |             #context part
 91 |             #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
 92 |             list_turn_t = tf.unstack(self.turns, axis=1) 
 93 |             list_turn_length = tf.unstack(self.every_turn_len, axis=1)
 94 |             
 95 |             sim_turns = []
 96 |             #for every turn_t calculate matching vector
 97 |             for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
 98 |                 Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size]
 99 |                 #Hu_stack = [Hu]
100 | 
101 |                 if self._conf['is_positional'] and self._conf['stack_num'] > 0:
102 |                     with tf.variable_scope('positional', reuse=True):
103 |                         Hu = op.positional_encoding_vector(Hu, max_timescale=10)
104 |                 Hu_stack = [Hu]
105 | 
106 | 
107 |                 for index in range(self._conf['stack_num']):
108 | 
109 |                     with tf.variable_scope('self_stack_' + str(index), reuse=True):
110 |                         Hu = layers.block(
111 |                             Hu, Hu, Hu,
112 |                             Q_lengths=t_turn_length, K_lengths=t_turn_length)
113 | 
114 |                         Hu_stack.append(Hu)
115 | 
116 | 
117 |                 Hu_stack = tf.stack(Hu_stack, axis=-1)
118 |                 #print('Hu_stack shape: %s' %Hu_stack.shape)
119 |                 
120 |                 #calculate similarity matrix
121 |                 with tf.variable_scope('similarity'):
122 |                     # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
123 |                     # divide sqrt(200) to prevent gradient explosion
124 |                     sim = tf.einsum('biks,bjks->bijs', Hu_stack, Hr_stack) / tf.sqrt(200.0)
125 | 
126 |                 sim_turns.append(sim)
127 | 
128 | 
129 |             #cnn and aggregation
130 |             sim = tf.stack(sim_turns, axis=1)
131 |             print('sim shape: %s' %sim.shape)
132 |             with tf.variable_scope('cnn_aggregation'):
133 |                 final_info = layers.CNN_3d(sim, 32, 16)
134 |                 #for douban
135 |                 #final_info = layers.CNN_3d(sim, 16, 16)
136 | 
137 | 
138 |             #loss and train
139 |             with tf.variable_scope('loss'):
140 |                 self.loss, self.logits = layers.loss(final_info, self.label)
141 | 
142 |                 self.global_step = tf.Variable(0, trainable=False)
143 |                 initial_learning_rate = self._conf['learning_rate']
144 |                 self.learning_rate = tf.train.exponential_decay(
145 |                     initial_learning_rate,
146 |                     global_step=self.global_step,
147 |                     decay_steps=400,
148 |                     decay_rate=0.9,
149 |                     staircase=True)
150 | 
151 |                 Optimizer = tf.train.AdamOptimizer(self.learning_rate)
152 |                 self.optimizer = Optimizer.minimize(self.loss)
153 | 
154 |                 self.init = tf.global_variables_initializer()
155 |                 self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"])
156 |                 self.all_variables = tf.global_variables() 
157 |                 self.all_operations = self._graph.get_operations()
158 |                 self.grads_and_vars = Optimizer.compute_gradients(self.loss)
159 | 
160 |                 for grad, var in self.grads_and_vars:
161 |                     if grad is None:
162 |                         print var
163 | 
164 |                 self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars]
165 |                 self.g_updates = Optimizer.apply_gradients(
166 |                     self.capped_gvs,
167 |                     global_step=self.global_step)
168 |     
169 |         return self._graph
170 | 
171 | 


--------------------------------------------------------------------------------
/ADE/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """train auto dialogue evaluation task"""
 16 | import io
 17 | import os
 18 | import sys
 19 | import six
 20 | import time
 21 | import numpy as np
 22 | 
 23 | import paddle
 24 | import paddle.fluid as fluid
 25 | 
 26 | import ade.reader as reader
 27 | from ade_net import create_net, set_word_embedding
 28 | 
 29 | from ade.utils.configure import PDConfig
 30 | from ade.utils.input_field import InputField
 31 | from ade.utils.model_check import check_cuda
 32 | import ade.utils.save_load_io as save_load_io
 33 | 
 34 | try: 
 35 |     import cPickle as pickle  #python 2
 36 | except ImportError as e:
 37 |     import pickle  #python 3
 38 | 
 39 | 
 40 | def do_train(args):
 41 |     """train function"""
 42 | 
 43 |     train_prog = fluid.default_main_program()
 44 |     startup_prog = fluid.default_startup_program()
 45 | 
 46 |     with fluid.program_guard(train_prog, startup_prog):
 47 |         train_prog.random_seed = args.random_seed
 48 |         startup_prog.random_seed = args.random_seed
 49 | 
 50 |         with fluid.unique_name.guard(): 
 51 |             context_wordseq = fluid.data(
 52 |                     name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
 53 |             response_wordseq = fluid.data(
 54 |                     name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
 55 |             labels = fluid.data(
 56 |                     name='labels', shape=[-1, 1], dtype='int64')
 57 | 
 58 |             input_inst = [context_wordseq, response_wordseq, labels]
 59 |             input_field = InputField(input_inst)
 60 |             data_reader = fluid.io.PyReader(feed_list=input_inst, 
 61 |                         capacity=4, iterable=False)
 62 | 
 63 |             loss = create_net(
 64 |                     is_training=True,
 65 |                     model_input=input_field, 
 66 |                     args=args
 67 |                 )
 68 |             loss.persistable = True
 69 |             # gradient clipping
 70 |             fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
 71 |                 max=1.0, min=-1.0))
 72 |             optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
 73 |             optimizer.minimize(loss)
 74 | 
 75 |             if args.use_cuda:
 76 |                 dev_count = fluid.core.get_cuda_device_count()
 77 |                 place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
 78 |             else: 
 79 |                 dev_count = int(os.environ.get('CPU_NUM', 1))
 80 |                 place = fluid.CPUPlace()
 81 | 
 82 |             processor = reader.DataProcessor(
 83 |                 data_path=args.training_file,
 84 |                 max_seq_length=args.max_seq_len, 
 85 |                 batch_size=args.batch_size)
 86 | 
 87 |             batch_generator = processor.data_generator(
 88 |                 place=place,
 89 |                 phase="train",
 90 |                 shuffle=True, 
 91 |                 sample_pro=args.sample_pro)
 92 | 
 93 |             num_train_examples = processor.get_num_examples(phase='train')
 94 |             max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size
 95 | 
 96 |             print("Num train examples: %d" % num_train_examples)
 97 |             print("Max train steps: %d" % max_train_steps)
 98 | 
 99 |     data_reader.decorate_batch_generator(batch_generator)
100 | 
101 |     exe = fluid.Executor(place)
102 |     exe.run(startup_prog)
103 | 
104 |     assert (args.init_from_checkpoint == "") or (
105 |         args.init_from_pretrain_model == "")
106 | 
107 |     #init from some checkpoint, to resume the previous training
108 |     if args.init_from_checkpoint: 
109 |         save_load_io.init_from_checkpoint(args, exe, train_prog)
110 |     #init from some pretrain models, to better solve the current task
111 |     if args.init_from_pretrain_model: 
112 |         save_load_io.init_from_pretrain_model(args, exe, train_prog)
113 | 
114 |     if args.word_emb_init:
115 |         print("start loading word embedding init ...")
116 |         if six.PY2:
117 |             word_emb = np.array(pickle.load(io.open(args.word_emb_init, 'rb'))).astype('float32')
118 |         else:
119 |             word_emb = np.array(pickle.load(io.open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32')
120 |         set_word_embedding(word_emb, place)
121 |         print("finish init word embedding  ...")
122 | 
123 |     build_strategy = fluid.compiler.BuildStrategy()
124 |     build_strategy.enable_inplace = True
125 | 
126 |     compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
127 |                 loss_name=loss.name, build_strategy=build_strategy)
128 | 
129 |     steps = 0
130 |     begin_time = time.time()
131 |     time_begin =  time.time()
132 | 
133 |     for epoch_step in range(args.epoch): 
134 |         data_reader.start()
135 |         sum_loss = 0.0
136 |         ce_loss = 0.0
137 |         while True:
138 |             try: 
139 |                 fetch_list = [loss.name]
140 |                 outputs = exe.run(compiled_train_prog, fetch_list=fetch_list)
141 |                 np_loss = outputs
142 |                 sum_loss += np.array(np_loss).mean()
143 |                 ce_loss = np.array(np_loss).mean()
144 | 
145 |                 if steps % args.print_steps == 0: 
146 |                     time_end = time.time()
147 |                     used_time = time_end - time_begin
148 |                     current_time = time.strftime('%Y-%m-%d %H:%M:%S',
149 |                                                 time.localtime(time.time()))
150 |                     print('%s epoch: %d, step: %s, avg loss %s, speed: %f steps/s' % (current_time, epoch_step, steps, sum_loss / args.print_steps, args.print_steps / used_time))
151 |                     sum_loss = 0.0
152 |                     time_begin = time.time()
153 | 
154 |                 if steps % args.save_steps == 0: 
155 |                     if args.save_checkpoint:
156 |                         save_load_io.save_checkpoint(args, exe, train_prog, "step_" + str(steps))
157 |                     if args.save_param: 
158 |                         save_load_io.save_param(args, exe, train_prog, "step_" + str(steps))
159 |                 steps += 1
160 |             except fluid.core.EOFException:  
161 |                 data_reader.reset()
162 |                 break
163 |     
164 |     if args.save_checkpoint: 
165 |         save_load_io.save_checkpoint(args, exe, train_prog, "step_final")
166 |     if args.save_param: 
167 |         save_load_io.save_param(args, exe, train_prog, "step_final")
168 | 
169 |     def get_cards(): 
170 |         num = 0
171 |         cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
172 |         if cards != '': 
173 |             num = len(cards.split(","))
174 |         return num
175 | 
176 |     if args.enable_ce: 
177 |         card_num = get_cards()
178 |         pass_time_cost = time.time() - begin_time
179 |         print("test_card_num", card_num)
180 |         print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost))
181 |         print("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss))
182 |         
183 | 
184 | if __name__ == '__main__':
185 |     
186 |     args = PDConfig(yaml_file="./data/config/ade.yaml")
187 |     args.build()
188 |     args.Print()
189 | 
190 |     check_cuda(args.use_cuda)
191 |     
192 |     do_train(args)
193 | 


--------------------------------------------------------------------------------
/DAM/models/last_net.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import cPickle as pickle
  4 | 
  5 | import utils.layers as layers
  6 | import utils.operations as op
  7 | 
  8 | class Net(object):
  9 |     '''Add positional encoding(initializer lambda is 0),
 10 |        cross-attention, cnn integrated and grad clip by value.
 11 | 
 12 |     Attributes:
 13 |         conf: a configuration paramaters dict
 14 |         word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size]
 15 |     '''
 16 |     def __init__(self, conf):
 17 |         self._graph = tf.Graph()
 18 |         self._conf = conf
 19 | 
 20 |         if self._conf['word_emb_init'] is not None:
 21 |             print('loading word emb init')
 22 |             self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb'))
 23 |         else:
 24 |             self._word_embedding_init = None
 25 | 
 26 |     def build_graph(self):
 27 |         with self._graph.as_default():
 28 |             rand_seed = self._conf['rand_seed']
 29 |             tf.set_random_seed(rand_seed)
 30 | 
 31 |             #word embedding
 32 |             if self._word_embedding_init is not None:
 33 |                 word_embedding_initializer = tf.constant_initializer(self._word_embedding_init)
 34 |             else:
 35 |                 word_embedding_initializer = tf.random_normal_initializer(stddev=0.1)
 36 | 
 37 |             self._word_embedding = tf.get_variable(
 38 |                 name='word_embedding',
 39 |                 shape=[self._conf['vocab_size']+1, self._conf['emb_size']],
 40 |                 dtype=tf.float32,
 41 |                 initializer=word_embedding_initializer)
 42 | 
 43 | 
 44 |             #define placehloders
 45 |             self.turns = tf.placeholder(
 46 |                 tf.int32,
 47 |                 shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]])
 48 | 
 49 |             self.tt_turns_len = tf.placeholder(
 50 |                 tf.int32,
 51 |                 shape=[self._conf["batch_size"]])
 52 | 
 53 |             self.every_turn_len = tf.placeholder(
 54 |                 tf.int32,
 55 |                 shape=[self._conf["batch_size"], self._conf["max_turn_num"]])
 56 |     
 57 |             self.response = tf.placeholder(
 58 |                 tf.int32, 
 59 |                 shape=[self._conf["batch_size"], self._conf["max_turn_len"]])
 60 | 
 61 |             self.response_len = tf.placeholder(
 62 |                 tf.int32, 
 63 |                 shape=[self._conf["batch_size"]])
 64 | 
 65 |             self.label = tf.placeholder(
 66 |                 tf.float32, 
 67 |                 shape=[self._conf["batch_size"]])
 68 | 
 69 | 
 70 |             #define operations
 71 |             #response part
 72 |             Hr = tf.nn.embedding_lookup(self._word_embedding, self.response)
 73 | 
 74 |             if self._conf['is_positional'] and self._conf['stack_num'] > 0:
 75 |                 with tf.variable_scope('positional'):
 76 |                     Hr = op.positional_encoding_vector(Hr, max_timescale=10)
 77 | 
 78 |             for index in range(self._conf['stack_num']):
 79 |                 with tf.variable_scope('self_stack_' + str(index)):
 80 |                     Hr = layers.block(
 81 |                         Hr, Hr, Hr, 
 82 |                         Q_lengths=self.response_len, K_lengths=self.response_len)
 83 | 
 84 |             #context part
 85 |             #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
 86 |             list_turn_t = tf.unstack(self.turns, axis=1) 
 87 |             list_turn_length = tf.unstack(self.every_turn_len, axis=1)
 88 |             
 89 |             sim_turns = []
 90 |             #for every turn_t calculate matching vector
 91 |             for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
 92 |                 Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size]
 93 | 
 94 |                 if self._conf['is_positional'] and self._conf['stack_num'] > 0:
 95 |                     with tf.variable_scope('positional', reuse=True):
 96 |                         Hu = op.positional_encoding_vector(Hu, max_timescale=10)
 97 | 
 98 |                 for index in range(self._conf['stack_num']):
 99 | 
100 |                     with tf.variable_scope('self_stack_' + str(index), reuse=True):
101 |                         Hu = layers.block(
102 |                             Hu, Hu, Hu,
103 |                             Q_lengths=t_turn_length, K_lengths=t_turn_length)
104 | 
105 | 
106 |                     
107 |                 with tf.variable_scope('u_attentd_r_' + str(index)):
108 |                     try:
109 |                         u_a_r = layers.block(
110 |                             Hu, Hr, Hr,
111 |                             Q_lengths=t_turn_length, K_lengths=self.response_len)
112 |                     except ValueError:
113 |                         tf.get_variable_scope().reuse_variables()
114 |                         u_a_r = layers.block(
115 |                             Hu, Hr, Hr,
116 |                             Q_lengths=t_turn_length, K_lengths=self.response_len)
117 |                             
118 | 
119 |                 with tf.variable_scope('r_attend_u_' + str(index)):
120 |                     try:
121 |                         r_a_u = layers.block(
122 |                             Hr, Hu, Hu,
123 |                             Q_lengths=self.response_len, K_lengths=t_turn_length)
124 |                     except ValueError:
125 |                         tf.get_variable_scope().reuse_variables()
126 |                         r_a_u = layers.block(
127 |                             Hr, Hu, Hu,
128 |                             Q_lengths=self.response_len, K_lengths=t_turn_length)
129 | 
130 |                 u_a_r = tf.stack([u_a_r, Hu], axis=-1)
131 |                 r_a_u = tf.stack([r_a_u, Hr], axis=-1)
132 |                 
133 |                 #calculate similarity matrix
134 |                 with tf.variable_scope('similarity'):
135 |                     # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
136 |                     # divide sqrt(200) to prevent gradient explosion
137 |                     sim = tf.einsum('biks,bjks->bijs', r_a_u, u_a_r) / tf.sqrt(200.0)
138 | 
139 |                 sim_turns.append(sim)
140 | 
141 | 
142 |             #cnn and aggregation
143 |             sim = tf.stack(sim_turns, axis=1)
144 |             print('sim shape: %s' %sim.shape)
145 |             with tf.variable_scope('cnn_aggregation'):
146 |                 final_info = layers.CNN_3d(sim, 32, 16)
147 |                 #for douban
148 |                 #final_info = layers.CNN_3d(sim, 16, 16)
149 | 
150 |             #loss and train
151 |             with tf.variable_scope('loss'):
152 |                 self.loss, self.logits = layers.loss(final_info, self.label)
153 | 
154 |                 self.global_step = tf.Variable(0, trainable=False)
155 |                 initial_learning_rate = self._conf['learning_rate']
156 |                 self.learning_rate = tf.train.exponential_decay(
157 |                     initial_learning_rate,
158 |                     global_step=self.global_step,
159 |                     decay_steps=400,
160 |                     decay_rate=0.9,
161 |                     staircase=True)
162 | 
163 |                 Optimizer = tf.train.AdamOptimizer(self.learning_rate)
164 |                 self.optimizer = Optimizer.minimize(self.loss)
165 | 
166 |                 self.init = tf.global_variables_initializer()
167 |                 self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"])
168 |                 self.all_variables = tf.global_variables() 
169 |                 self.all_operations = self._graph.get_operations()
170 |                 self.grads_and_vars = Optimizer.compute_gradients(self.loss)
171 | 
172 |                 for grad, var in self.grads_and_vars:
173 |                     if grad is None:
174 |                         print var
175 | 
176 |                 self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars]
177 |                 self.g_updates = Optimizer.apply_gradients(
178 |                     self.capped_gvs,
179 |                     global_step=self.global_step)
180 |     
181 |         return self._graph
182 | 
183 | 


--------------------------------------------------------------------------------
/DGU/dgu/batching.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Mask, padding and batching."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import numpy as np
 21 | 
 22 | 
 23 | def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
 24 |     """
 25 |     Add mask for batch_tokens, return out, mask_label, mask_pos;
 26 |     Note: mask_pos responding the batch_tokens after padded;
 27 |     """
 28 |     max_len = max([len(sent) for sent in batch_tokens])
 29 |     mask_label = []
 30 |     mask_pos = []
 31 |     prob_mask = np.random.rand(total_token_num)
 32 |     # Note: the first token is [CLS], so [low=1]
 33 |     replace_ids = np.random.randint(1, high=vocab_size, size=total_token_num)
 34 |     pre_sent_len = 0
 35 |     prob_index = 0
 36 |     for sent_index, sent in enumerate(batch_tokens):
 37 |         mask_flag = False
 38 |         prob_index += pre_sent_len
 39 |         for token_index, token in enumerate(sent):
 40 |             prob = prob_mask[prob_index + token_index]
 41 |             if prob > 0.15:
 42 |                 continue
 43 |             elif 0.03 < prob <= 0.15:
 44 |                 # mask
 45 |                 if token != SEP and token != CLS:
 46 |                     mask_label.append(sent[token_index])
 47 |                     sent[token_index] = MASK
 48 |                     mask_flag = True
 49 |                     mask_pos.append(sent_index * max_len + token_index)
 50 |             elif 0.015 < prob <= 0.03:
 51 |                 # random replace
 52 |                 if token != SEP and token != CLS:
 53 |                     mask_label.append(sent[token_index])
 54 |                     sent[token_index] = replace_ids[prob_index + token_index]
 55 |                     mask_flag = True
 56 |                     mask_pos.append(sent_index * max_len + token_index)
 57 |             else:
 58 |                 # keep the original token
 59 |                 if token != SEP and token != CLS:
 60 |                     mask_label.append(sent[token_index])
 61 |                     mask_pos.append(sent_index * max_len + token_index)
 62 |         pre_sent_len = len(sent)
 63 | 
 64 |         # ensure at least mask one word in a sentence
 65 |         while not mask_flag:
 66 |             token_index = int(np.random.randint(1, high=len(sent) - 1, size=1))
 67 |             if sent[token_index] != SEP and sent[token_index] != CLS:
 68 |                 mask_label.append(sent[token_index])
 69 |                 sent[token_index] = MASK
 70 |                 mask_flag = True
 71 |                 mask_pos.append(sent_index * max_len + token_index)
 72 |     mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
 73 |     mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
 74 |     return batch_tokens, mask_label, mask_pos
 75 | 
 76 | 
 77 | def prepare_batch_data(task_name,
 78 |                        insts, 
 79 |                        max_len, 
 80 |                        total_token_num,
 81 |                        voc_size=0,
 82 |                        pad_id=None,
 83 |                        cls_id=None,
 84 |                        sep_id=None,
 85 |                        mask_id=None,
 86 |                        return_input_mask=True,
 87 |                        return_max_len=True,
 88 |                        return_num_token=False):
 89 |     """
 90 |     1. generate Tensor of data
 91 |     2. generate Tensor of position
 92 |     3. generate self attention mask, [shape: batch_size *  max_len * max_len]
 93 |     """
 94 |     batch_src_ids = [inst[0] for inst in insts]
 95 |     batch_sent_ids = [inst[1] for inst in insts]
 96 |     batch_pos_ids = [inst[2] for inst in insts]
 97 |     labels_list = []
 98 |     # compatible with squad, whose example includes start/end positions, 
 99 |     # or unique id
100 | 
101 |     if isinstance(insts[0][3], list): 
102 |         if task_name == "atis_slot": 
103 |             labels_list = [inst[3] + [0] * (max_len - len(inst[3])) for inst in insts]
104 |             labels_list = [np.array(labels_list).astype("int64").reshape([-1, max_len])]
105 |         elif task_name == "dstc2": 
106 |             labels_list = [inst[3] for inst in insts]
107 |             labels_list = [np.array(labels_list).astype("int64")]
108 |     else: 
109 |         for i in range(3, len(insts[0]), 1):
110 |             labels = [inst[i] for inst in insts]
111 |             labels = np.array(labels).astype("int64").reshape([-1, 1])
112 |             labels_list.append(labels)
113 | 
114 |     # First step: do mask without padding
115 |     if mask_id >= 0:
116 |         out, mask_label, mask_pos = mask(
117 |             batch_src_ids,
118 |             total_token_num,
119 |             vocab_size=voc_size,
120 |             CLS=cls_id,
121 |             SEP=sep_id,
122 |             MASK=mask_id)
123 |     else:
124 |         out = batch_src_ids
125 |     # Second step: padding
126 |     src_id, self_input_mask = pad_batch_data(
127 |         out, 
128 |         max_len, 
129 |         pad_idx=pad_id, 
130 |         return_input_mask=True)
131 |     pos_id = pad_batch_data(
132 |         batch_pos_ids, 
133 |         max_len, 
134 |         pad_idx=pad_id, 
135 |         return_pos=False, 
136 |         return_input_mask=False)
137 |     sent_id = pad_batch_data(
138 |         batch_sent_ids, 
139 |         max_len, 
140 |         pad_idx=pad_id, 
141 |         return_pos=False, 
142 |         return_input_mask=False)
143 | 
144 |     if mask_id >= 0:
145 |         return_list = [
146 |             src_id, pos_id, sent_id, self_input_mask, mask_label, mask_pos
147 |         ] + labels_list
148 |     else: 
149 |         return_list = [src_id, pos_id, sent_id, self_input_mask] + labels_list
150 | 
151 |     return return_list if len(return_list) > 1 else return_list[0]
152 | 
153 | 
154 | def pad_batch_data(insts,
155 |                    max_len_in,
156 |                    pad_idx=0,
157 |                    return_pos=False,
158 |                    return_input_mask=False,
159 |                    return_max_len=False,
160 |                    return_num_token=False):
161 |     """
162 |     Pad the instances to the max sequence length in batch, and generate the
163 |     corresponding position data and attention bias.
164 |     """
165 |     return_list = []
166 |     max_len = max_len_in if max_len_in != -1 else max(len(inst) for inst in insts)
167 |     # Any token included in dict can be used to pad, since the paddings' loss
168 |     # will be masked out by weights and make no effect on parameter gradients.
169 | 
170 |     inst_data = np.array(
171 |         [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts
172 |     ])
173 |     return_list += [inst_data.astype("int64").reshape([-1, max_len])]
174 | 
175 |     # position data
176 |     if return_pos:
177 |         inst_pos = np.array([
178 |             list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
179 |             for inst in insts
180 |         ])
181 | 
182 |         return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
183 | 
184 |     if return_input_mask:
185 |         # This is used to avoid attention on paddings.
186 |         input_mask_data = np.array([[1] * len(inst) + [0] * 
187 |                                     (max_len - len(inst)) for inst in insts])
188 |         input_mask_data = np.expand_dims(input_mask_data, axis=-1)
189 |         return_list += [input_mask_data.astype("float32")] 
190 | 
191 |     if return_max_len:
192 |         return_list += [max_len]
193 | 
194 |     if return_num_token:
195 |         num_token = 0
196 |         for inst in insts:
197 |             num_token += len(inst)
198 |         return_list += [num_token]
199 | 
200 |     return return_list if len(return_list) > 1 else return_list[0]
201 | 
202 | 
203 | if __name__ == "__main__":
204 |     pass
205 | 


--------------------------------------------------------------------------------
/DGU/dgu/scripts/build_swda_dataset.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """build swda train dev test dataset"""
 16 | 
 17 | import sys
 18 | import csv
 19 | import os
 20 | import io
 21 | import re
 22 | 
 23 | import commonlib
 24 | 
 25 | 
 26 | class SWDA(object): 
 27 |     """
 28 |     dialogue act dataset swda data process
 29 |     """
 30 |     def __init__(self): 
 31 |         """
 32 |         init instance
 33 |         """
 34 |         self.tag_id = 0
 35 |         self.map_tag_dict = dict()
 36 |         self.out_dir = "../../data/input/data/swda"
 37 |         self.data_list = "./conf/swda.conf"
 38 |         self.map_tag = "../../data/input/data/swda/map_tag_id.txt"
 39 |         self.src_dir = "../../data/input/data/swda/source_data/swda"
 40 |         self._load_file()
 41 | 
 42 |     def _load_file(self): 
 43 |         """
 44 |         load dataset filename
 45 |         """
 46 |         self.data_dict = commonlib.load_dict(self.data_list)
 47 |         self.file_dict = {}
 48 |         child_dir = commonlib.get_dir_list(self.src_dir)
 49 |         for chd in child_dir: 
 50 |             file_list, file_path = commonlib.get_file_list(chd)
 51 |             for i in range(len(file_list)): 
 52 |                 name = file_list[i] 
 53 |                 keyword = "sw%s" % name.split('.')[0].split('_')[-1]
 54 |                 self.file_dict[keyword] = file_path[i]
 55 | 
 56 |     def _parser_dataset(self, data_type): 
 57 |         """
 58 |         parser train dev test dataset
 59 |         """
 60 |         out_filename = "%s/%s.txt" % (self.out_dir, data_type)
 61 |         fw = io.open(out_filename, 'w', encoding='utf8')
 62 |         for name in self.data_dict[data_type]: 
 63 |             file_path = self.file_dict[name]
 64 |             fr = io.open(file_path, 'r', encoding="utf8")
 65 |             idx = 0
 66 |             row = csv.reader(fr, delimiter = ',')
 67 |             for r in row: 
 68 |                 if idx == 0: 
 69 |                     idx += 1
 70 |                     continue
 71 |                 out = self._parser_utterence(r)
 72 |                 fw.write(u"%s\n" % out)
 73 | 
 74 |     def _clean_text(self, text): 
 75 |         """
 76 |         text cleaning for dialogue act dataset
 77 |         """
 78 |         if text.startswith('<') and text.endswith('>.'): 
 79 |             return text
 80 |         if "[" in text or "]" in text:
 81 |             stat = True
 82 |         else: 
 83 |             stat = False
 84 |         group = re.findall("\[.*?\+.*?\]", text)
 85 |         while group and stat: 
 86 |             for elem in group: 
 87 |                 elem_src = elem
 88 |                 elem = re.sub('\+', '', elem.lstrip('[').rstrip(']'))
 89 |                 text = text.replace(elem_src, elem)
 90 |             if "[" in text or "]" in text: 
 91 |                 stat = True
 92 |             else: 
 93 |                 stat = False
 94 |             group = re.findall("\[.*?\+.*?\]", text)
 95 |         if "{" in text or "}" in text: 
 96 |             stat = True
 97 |         else: 
 98 |             stat = False
 99 |         group = re.findall("{[A-Z].*?}", text)
100 |         while group and stat: 
101 |             child_group = re.findall("{[A-Z]*(.*?)}", text)
102 |             for i in range(len(group)):  
103 |                 text = text.replace(group[i], child_group[i])
104 |             if "{" in text or "}" in text: 
105 |                 stat = True
106 |             else: 
107 |                 stat = False
108 |             group = re.findall("{[A-Z].*?}", text)
109 |         if "(" in text or ")" in text: 
110 |             stat = True
111 |         else: 
112 |             stat = False
113 |         group = re.findall("\(\(.*?\)\)", text)
114 |         while group and stat: 
115 |             for elem in group: 
116 |                 if elem: 
117 |                     elem_clean = re.sub("\(|\)", "", elem)
118 |                     text = text.replace(elem, elem_clean)
119 |                 else: 
120 |                     text = text.replace(elem, "mumblex")
121 |             if "(" in text or ")" in text:
122 |                 stat = True
123 |             else: 
124 |                 stat = False
125 |             group = re.findall("\(\((.*?)\)\)", text)
126 | 
127 |         group = re.findall("\<.*?\>", text)
128 |         if group: 
129 |             for elem in group: 
130 |                 text = text.replace(elem, "")
131 | 
132 |         text = re.sub(r" \'s", "\'s", text)
133 |         text = re.sub(r" n\'t", "n\'t", text)
134 |         text = re.sub(r" \'t", "\'t", text)
135 |         text = re.sub(" +", " ", text)
136 |         text = text.rstrip('\/').strip().strip('-')
137 |         text = re.sub("\[|\]|\+|\>|\<|\{|\}", "", text)
138 |         return text.strip().lower()
139 | 
140 |     def _map_tag(self, da_tag): 
141 |         """
142 |         map tag to 42 classes
143 |         """
144 |         curr_da_tags = []
145 |         curr_das = re.split(r"\s*[,;]\s*", da_tag)
146 |         for curr_da in curr_das: 
147 |             if curr_da == "qy_d" or curr_da == "qw^d" or curr_da == "b^m":
148 |                 pass
149 |             elif curr_da == "nn^e":
150 |                 curr_da = "ng"
151 |             elif curr_da == "ny^e":
152 |                 curr_da = "na"
153 |             else: 
154 |                 curr_da = re.sub(r'(.)\^.*', r'\1', curr_da)
155 |                 curr_da = re.sub(r'[\(\)@*]', '', curr_da)
156 |                 tag = curr_da
157 |                 if tag in ('qr', 'qy'): 
158 |                     tag = 'qy'
159 |                 elif tag in ('fe', 'ba'):
160 |                     tag = 'ba'
161 |                 elif tag in ('oo', 'co', 'cc'):
162 |                     tag = 'oo_co_cc'
163 |                 elif tag in ('fx', 'sv'):
164 |                     tag = 'sv'
165 |                 elif tag in ('aap', 'am'):
166 |                     tag = 'aap_am'
167 |                 elif tag in ('arp', 'nd'):
168 |                     tag = 'arp_nd'
169 |                 elif tag in ('fo', 'o', 'fw', '"', 'by', 'bc'):
170 |                     tag = 'fo_o_fw_"_by_bc'
171 |                 curr_da = tag
172 |             curr_da_tags.append(curr_da)
173 |         if curr_da_tags[0] not in self.map_tag_dict: 
174 |             self.map_tag_dict[curr_da_tags[0]] = self.tag_id
175 |             self.tag_id += 1
176 |         return self.map_tag_dict[curr_da_tags[0]]
177 |     
178 |     def _parser_utterence(self, line): 
179 |         """
180 |         parser one turn dialogue
181 |         """
182 |         conversation_no = line[2]
183 |         act_tag = line[4]
184 |         caller = line[5]
185 |         text = line[8]
186 |         text = self._clean_text(text)
187 |         act_tag = self._map_tag(act_tag)
188 | 
189 |         out = "%s\t%s\t%s\t%s" % (conversation_no, act_tag, caller, text)
190 |         return out
191 |         
192 |     def get_train_dataset(self): 
193 |         """
194 |         parser train dataset and print train.txt
195 |         """
196 |         self._parser_dataset("train")
197 | 
198 |     def get_dev_dataset(self): 
199 |         """
200 |         parser dev dataset and print dev.txt
201 |         """
202 |         self._parser_dataset("dev")
203 | 
204 |     def get_test_dataset(self): 
205 |         """
206 |         parser test dataset and print test.txt
207 |         """
208 |         self._parser_dataset("test")
209 | 
210 |     def get_labels(self): 
211 |         """
212 |         get tag and map ids file
213 |         """
214 |         fw = io.open(self.map_tag, 'w', encoding='utf8')
215 |         for elem in self.map_tag_dict: 
216 |             fw.write(u"%s\t%s\n" % (elem, self.map_tag_dict[elem]))
217 | 
218 |     def main(self): 
219 |         """
220 |         run data process
221 |         """
222 |         self.get_train_dataset()
223 |         self.get_dev_dataset()
224 |         self.get_test_dataset()
225 |         self.get_labels()
226 | 
227 | if __name__ == "__main__": 
228 |     swda_inst = SWDA()
229 |     swda_inst.main()
230 | 
231 | 
232 | 
233 | 
234 | 


--------------------------------------------------------------------------------
/DAM/models/cross_match_net.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import cPickle as pickle
  4 | 
  5 | import utils.layers as layers
  6 | import utils.operations as op
  7 | 
  8 | class Net(object):
  9 |     '''Add positional encoding(initializer lambda is 0),
 10 |        cross-attention, cnn integrated and grad clip by value.
 11 | 
 12 |     Attributes:
 13 |         conf: a configuration paramaters dict
 14 |         word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size]
 15 |     '''
 16 |     def __init__(self, conf):
 17 |         self._graph = tf.Graph()
 18 |         self._conf = conf
 19 | 
 20 |         if self._conf['word_emb_init'] is not None:
 21 |             print('loading word emb init')
 22 |             self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb'))
 23 |         else:
 24 |             self._word_embedding_init = None
 25 | 
 26 |     def build_graph(self):
 27 |         with self._graph.as_default():
 28 |             rand_seed = self._conf['rand_seed']
 29 |             tf.set_random_seed(rand_seed)
 30 | 
 31 |             #word embedding
 32 |             if self._word_embedding_init is not None:
 33 |                 word_embedding_initializer = tf.constant_initializer(self._word_embedding_init)
 34 |             else:
 35 |                 word_embedding_initializer = tf.random_normal_initializer(stddev=0.1)
 36 | 
 37 |             self._word_embedding = tf.get_variable(
 38 |                 name='word_embedding',
 39 |                 shape=[self._conf['vocab_size']+1, self._conf['emb_size']],
 40 |                 dtype=tf.float32,
 41 |                 initializer=word_embedding_initializer)
 42 | 
 43 | 
 44 |             #define placehloders
 45 |             self.turns = tf.placeholder(
 46 |                 tf.int32,
 47 |                 shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]])
 48 | 
 49 |             self.tt_turns_len = tf.placeholder(
 50 |                 tf.int32,
 51 |                 shape=[self._conf["batch_size"]])
 52 | 
 53 |             self.every_turn_len = tf.placeholder(
 54 |                 tf.int32,
 55 |                 shape=[self._conf["batch_size"], self._conf["max_turn_num"]])
 56 |     
 57 |             self.response = tf.placeholder(
 58 |                 tf.int32, 
 59 |                 shape=[self._conf["batch_size"], self._conf["max_turn_len"]])
 60 | 
 61 |             self.response_len = tf.placeholder(
 62 |                 tf.int32, 
 63 |                 shape=[self._conf["batch_size"]])
 64 | 
 65 |             self.label = tf.placeholder(
 66 |                 tf.float32, 
 67 |                 shape=[self._conf["batch_size"]])
 68 | 
 69 | 
 70 |             #define operations
 71 |             #response part
 72 |             Hr = tf.nn.embedding_lookup(self._word_embedding, self.response)
 73 | 
 74 |             if self._conf['is_positional'] and self._conf['stack_num'] > 0:
 75 |                 with tf.variable_scope('positional'):
 76 |                     Hr = op.positional_encoding_vector(Hr, max_timescale=10)
 77 |             Hr_stack = [Hr]
 78 | 
 79 |             for index in range(self._conf['stack_num']):
 80 |                 with tf.variable_scope('self_stack_' + str(index)):
 81 |                     Hr = layers.block(
 82 |                         Hr, Hr, Hr, 
 83 |                         Q_lengths=self.response_len, K_lengths=self.response_len)
 84 |                     Hr_stack.append(Hr)
 85 | 
 86 | 
 87 |             #context part
 88 |             #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
 89 |             list_turn_t = tf.unstack(self.turns, axis=1) 
 90 |             list_turn_length = tf.unstack(self.every_turn_len, axis=1)
 91 |             
 92 |             sim_turns = []
 93 |             #for every turn_t calculate matching vector
 94 |             for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
 95 |                 Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size]
 96 | 
 97 |                 if self._conf['is_positional'] and self._conf['stack_num'] > 0:
 98 |                     with tf.variable_scope('positional', reuse=True):
 99 |                         Hu = op.positional_encoding_vector(Hu, max_timescale=10)
100 |                 Hu_stack = [Hu]
101 | 
102 |                 for index in range(self._conf['stack_num']):
103 | 
104 |                     with tf.variable_scope('self_stack_' + str(index), reuse=True):
105 |                         Hu = layers.block(
106 |                             Hu, Hu, Hu,
107 |                             Q_lengths=t_turn_length, K_lengths=t_turn_length)
108 | 
109 |                         Hu_stack.append(Hu)
110 | 
111 | 
112 | 
113 |                 r_a_t_stack = []
114 |                 t_a_r_stack = []
115 |                 for index in range(self._conf['stack_num']+1):
116 | 
117 |                     with tf.variable_scope('t_attend_r_' + str(index)):
118 |                         try:
119 |                             t_a_r = layers.block(
120 |                                 Hu_stack[index], Hr_stack[index], Hr_stack[index],
121 |                                 Q_lengths=t_turn_length, K_lengths=self.response_len)
122 |                         except ValueError:
123 |                             tf.get_variable_scope().reuse_variables()
124 |                             t_a_r = layers.block(
125 |                                 Hu_stack[index], Hr_stack[index], Hr_stack[index],
126 |                                 Q_lengths=t_turn_length, K_lengths=self.response_len)
127 | 
128 | 
129 |                     with tf.variable_scope('r_attend_t_' + str(index)):
130 |                         try:
131 |                             r_a_t = layers.block(
132 |                                 Hr_stack[index], Hu_stack[index], Hu_stack[index],
133 |                                 Q_lengths=self.response_len, K_lengths=t_turn_length)
134 |                         except ValueError:
135 |                             tf.get_variable_scope().reuse_variables()
136 |                             r_a_t = layers.block(
137 |                                 Hr_stack[index], Hu_stack[index], Hu_stack[index],
138 |                                 Q_lengths=self.response_len, K_lengths=t_turn_length)
139 | 
140 |                     t_a_r_stack.append(t_a_r)
141 |                     r_a_t_stack.append(r_a_t)
142 | 
143 |                 
144 |                 t_a_r = tf.stack(t_a_r_stack, axis=-1)
145 |                 r_a_t = tf.stack(r_a_t_stack, axis=-1)
146 | 
147 |                             
148 |                 #calculate similarity matrix
149 |                 with tf.variable_scope('similarity'):
150 |                     # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
151 |                     # divide sqrt(200) to prevent gradient explosion
152 |                     sim = tf.einsum('biks,bjks->bijs', t_a_r, r_a_t) / tf.sqrt(200.0)
153 | 
154 |                 sim_turns.append(sim)
155 | 
156 | 
157 |             #cnn and aggregation
158 |             sim = tf.stack(sim_turns, axis=1)
159 |             print('sim shape: %s' %sim.shape)
160 |             with tf.variable_scope('cnn_aggregation'):
161 |                 final_info = layers.CNN_3d(sim, 32, 16)
162 |                 #for douban
163 |                 #final_info = layers.CNN_3d(sim, 16, 16)
164 | 
165 | 
166 |             #loss and train
167 |             with tf.variable_scope('loss'):
168 |                 self.loss, self.logits = layers.loss(final_info, self.label)
169 | 
170 |                 self.global_step = tf.Variable(0, trainable=False)
171 |                 initial_learning_rate = self._conf['learning_rate']
172 |                 self.learning_rate = tf.train.exponential_decay(
173 |                     initial_learning_rate,
174 |                     global_step=self.global_step,
175 |                     decay_steps=400,
176 |                     decay_rate=0.9,
177 |                     staircase=True)
178 | 
179 |                 Optimizer = tf.train.AdamOptimizer(self.learning_rate)
180 |                 self.optimizer = Optimizer.minimize(
181 |                     self.loss,
182 |                     global_step=self.global_step)
183 | 
184 |                 self.init = tf.global_variables_initializer()
185 |                 self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"])
186 |                 self.all_variables = tf.global_variables() 
187 |                 self.all_operations = self._graph.get_operations()
188 |                 self.grads_and_vars = Optimizer.compute_gradients(self.loss)
189 | 
190 |                 for grad, var in self.grads_and_vars:
191 |                     if grad is None:
192 |                         print var
193 | 
194 |                 self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars]
195 |                 self.g_updates = Optimizer.apply_gradients(
196 |                     self.capped_gvs,
197 |                     global_step=self.global_step)
198 |     
199 |         return self._graph
200 | 
201 | 


--------------------------------------------------------------------------------
/DAM/models/net.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import cPickle as pickle
  4 | 
  5 | import utils.layers as layers
  6 | import utils.operations as op
  7 | 
  8 | class Net(object):
  9 |     '''Add positional encoding(initializer lambda is 0),
 10 |        cross-attention, cnn integrated and grad clip by value.
 11 | 
 12 |     Attributes:
 13 |         conf: a configuration paramaters dict
 14 |         word_embedding_init: a 2-d array with shape [vocab_size+1, emb_size]
 15 |     '''
 16 |     def __init__(self, conf):
 17 |         self._graph = tf.Graph()
 18 |         self._conf = conf
 19 | 
 20 |         if self._conf['word_emb_init'] is not None:
 21 |             print('loading word emb init')
 22 |             self._word_embedding_init = pickle.load(open(self._conf['word_emb_init'], 'rb'))
 23 |         else:
 24 |             self._word_embedding_init = None
 25 | 
 26 |     def build_graph(self):
 27 |         with self._graph.as_default():
 28 |             if self._conf['rand_seed'] is not None:
 29 |                 rand_seed = self._conf['rand_seed']
 30 |                 tf.set_random_seed(rand_seed)
 31 |                 print('set tf random seed: %s' %self._conf['rand_seed'])
 32 | 
 33 |             #word embedding
 34 |             if self._word_embedding_init is not None:
 35 |                 word_embedding_initializer = tf.constant_initializer(self._word_embedding_init)
 36 |             else:
 37 |                 word_embedding_initializer = tf.random_normal_initializer(stddev=0.1)
 38 | 
 39 |             self._word_embedding = tf.get_variable(
 40 |                 name='word_embedding',
 41 |                 shape=[self._conf['vocab_size']+1, self._conf['emb_size']],
 42 |                 dtype=tf.float32,
 43 |                 initializer=word_embedding_initializer)
 44 | 
 45 | 
 46 |             #define placehloders
 47 |             self.turns = tf.placeholder(
 48 |                 tf.int32,
 49 |                 shape=[self._conf["batch_size"], self._conf["max_turn_num"], self._conf["max_turn_len"]])
 50 | 
 51 |             self.tt_turns_len = tf.placeholder(
 52 |                 tf.int32,
 53 |                 shape=[self._conf["batch_size"]])
 54 | 
 55 |             self.every_turn_len = tf.placeholder(
 56 |                 tf.int32,
 57 |                 shape=[self._conf["batch_size"], self._conf["max_turn_num"]])
 58 |     
 59 |             self.response = tf.placeholder(
 60 |                 tf.int32, 
 61 |                 shape=[self._conf["batch_size"], self._conf["max_turn_len"]])
 62 | 
 63 |             self.response_len = tf.placeholder(
 64 |                 tf.int32, 
 65 |                 shape=[self._conf["batch_size"]])
 66 | 
 67 |             self.label = tf.placeholder(
 68 |                 tf.float32, 
 69 |                 shape=[self._conf["batch_size"]])
 70 | 
 71 | 
 72 |             #define operations
 73 |             #response part
 74 |             Hr = tf.nn.embedding_lookup(self._word_embedding, self.response)
 75 | 
 76 |             if self._conf['is_positional'] and self._conf['stack_num'] > 0:
 77 |                 with tf.variable_scope('positional'):
 78 |                     Hr = op.positional_encoding_vector(Hr, max_timescale=10)
 79 |             Hr_stack = [Hr]
 80 | 
 81 |             for index in range(self._conf['stack_num']):
 82 |                 with tf.variable_scope('self_stack_' + str(index)):
 83 |                     Hr = layers.block(
 84 |                         Hr, Hr, Hr, 
 85 |                         Q_lengths=self.response_len, K_lengths=self.response_len)
 86 |                     Hr_stack.append(Hr)
 87 | 
 88 | 
 89 |             #context part
 90 |             #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
 91 |             list_turn_t = tf.unstack(self.turns, axis=1) 
 92 |             list_turn_length = tf.unstack(self.every_turn_len, axis=1)
 93 |             
 94 |             sim_turns = []
 95 |             #for every turn_t calculate matching vector
 96 |             for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
 97 |                 Hu = tf.nn.embedding_lookup(self._word_embedding, turn_t) #[batch, max_turn_len, emb_size]
 98 | 
 99 |                 if self._conf['is_positional'] and self._conf['stack_num'] > 0:
100 |                     with tf.variable_scope('positional', reuse=True):
101 |                         Hu = op.positional_encoding_vector(Hu, max_timescale=10)
102 |                 Hu_stack = [Hu]
103 | 
104 |                 for index in range(self._conf['stack_num']):
105 | 
106 |                     with tf.variable_scope('self_stack_' + str(index), reuse=True):
107 |                         Hu = layers.block(
108 |                             Hu, Hu, Hu,
109 |                             Q_lengths=t_turn_length, K_lengths=t_turn_length)
110 | 
111 |                         Hu_stack.append(Hu)
112 | 
113 | 
114 | 
115 |                 r_a_t_stack = []
116 |                 t_a_r_stack = []
117 |                 for index in range(self._conf['stack_num']+1):
118 | 
119 |                     with tf.variable_scope('t_attend_r_' + str(index)):
120 |                         try:
121 |                             t_a_r = layers.block(
122 |                                 Hu_stack[index], Hr_stack[index], Hr_stack[index],
123 |                                 Q_lengths=t_turn_length, K_lengths=self.response_len)
124 |                         except ValueError:
125 |                             tf.get_variable_scope().reuse_variables()
126 |                             t_a_r = layers.block(
127 |                                 Hu_stack[index], Hr_stack[index], Hr_stack[index],
128 |                                 Q_lengths=t_turn_length, K_lengths=self.response_len)
129 | 
130 | 
131 |                     with tf.variable_scope('r_attend_t_' + str(index)):
132 |                         try:
133 |                             r_a_t = layers.block(
134 |                                 Hr_stack[index], Hu_stack[index], Hu_stack[index],
135 |                                 Q_lengths=self.response_len, K_lengths=t_turn_length)
136 |                         except ValueError:
137 |                             tf.get_variable_scope().reuse_variables()
138 |                             r_a_t = layers.block(
139 |                                 Hr_stack[index], Hu_stack[index], Hu_stack[index],
140 |                                 Q_lengths=self.response_len, K_lengths=t_turn_length)
141 | 
142 |                     t_a_r_stack.append(t_a_r)
143 |                     r_a_t_stack.append(r_a_t)
144 | 
145 |                 t_a_r_stack.extend(Hu_stack)
146 |                 r_a_t_stack.extend(Hr_stack)
147 |                 
148 |                 t_a_r = tf.stack(t_a_r_stack, axis=-1)
149 |                 r_a_t = tf.stack(r_a_t_stack, axis=-1)
150 | 
151 |                             
152 |                 #calculate similarity matrix
153 |                 with tf.variable_scope('similarity'):
154 |                     # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
155 |                     # divide sqrt(200) to prevent gradient explosion
156 |                     sim = tf.einsum('biks,bjks->bijs', t_a_r, r_a_t) / tf.sqrt(200.0)
157 | 
158 |                 sim_turns.append(sim)
159 | 
160 | 
161 |             #cnn and aggregation
162 |             sim = tf.stack(sim_turns, axis=1)
163 |             print('sim shape: %s' %sim.shape)
164 |             with tf.variable_scope('cnn_aggregation'):
165 |                 final_info = layers.CNN_3d(sim, 32, 16)
166 |                 #for douban
167 |                 #final_info = layers.CNN_3d(sim, 16, 16)
168 | 
169 |             #loss and train
170 |             with tf.variable_scope('loss'):
171 |                 self.loss, self.logits = layers.loss(final_info, self.label)
172 | 
173 |                 self.global_step = tf.Variable(0, trainable=False)
174 |                 initial_learning_rate = self._conf['learning_rate']
175 |                 self.learning_rate = tf.train.exponential_decay(
176 |                     initial_learning_rate,
177 |                     global_step=self.global_step,
178 |                     decay_steps=400,
179 |                     decay_rate=0.9,
180 |                     staircase=True)
181 | 
182 |                 Optimizer = tf.train.AdamOptimizer(self.learning_rate)
183 |                 self.optimizer = Optimizer.minimize(
184 |                     self.loss,
185 |                     global_step=self.global_step)
186 | 
187 |                 self.init = tf.global_variables_initializer()
188 |                 self.saver = tf.train.Saver(max_to_keep = self._conf["max_to_keep"])
189 |                 self.all_variables = tf.global_variables() 
190 |                 self.all_operations = self._graph.get_operations()
191 |                 self.grads_and_vars = Optimizer.compute_gradients(self.loss)
192 | 
193 |                 for grad, var in self.grads_and_vars:
194 |                     if grad is None:
195 |                         print var
196 | 
197 |                 self.capped_gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.grads_and_vars]
198 |                 self.g_updates = Optimizer.apply_gradients(
199 |                     self.capped_gvs,
200 |                     global_step=self.global_step)
201 |     
202 |         return self._graph
203 | 
204 | 


--------------------------------------------------------------------------------
/DGU/dgu/bert.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """BERT model."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import os
 22 | import io
 23 | import sys
 24 | import six
 25 | import json
 26 | import numpy as np
 27 | import paddle.fluid as fluid
 28 | 
 29 | from dgu.transformer_encoder import encoder, pre_process_layer
 30 | 
 31 | 
 32 | class BertConfig(object):
 33 |     def __init__(self, config_path):
 34 |         self._config_dict = self._parse(config_path)
 35 | 
 36 |     def _parse(self, config_path):
 37 |         try:
 38 |             json_file = io.open(config_path, 'r', encoding="utf8")
 39 |             config_dict = json.load(json_file)
 40 |         except Exception:
 41 |             raise IOError("Error in parsing bert model config file '%s'" %
 42 |                           config_path)
 43 |         else:
 44 |             return config_dict
 45 | 
 46 |     def __getitem__(self, key):
 47 |         return self._config_dict[key]
 48 | 
 49 |     def print_config(self):
 50 |         for arg, value in sorted(six.iteritems(self._config_dict)):
 51 |             print('%s: %s' % (arg, value))
 52 |         print('------------------------------------------------')
 53 | 
 54 | 
 55 | class BertModel(object):
 56 |     def __init__(self,
 57 |                  src_ids,
 58 |                  position_ids,
 59 |                  sentence_ids,
 60 |                  input_mask,
 61 |                  config,
 62 |                  weight_sharing=True,
 63 |                  use_fp16=False):
 64 | 
 65 |         self._emb_size = config['hidden_size']
 66 |         self._n_layer = config['num_hidden_layers']
 67 |         self._n_head = config['num_attention_heads']
 68 |         self._voc_size = config['vocab_size']
 69 |         self._max_position_seq_len = config['max_position_embeddings']
 70 |         self._sent_types = config['type_vocab_size']
 71 |         self._hidden_act = config['hidden_act']
 72 |         self._prepostprocess_dropout = config['hidden_dropout_prob']
 73 |         self._attention_dropout = config['attention_probs_dropout_prob']
 74 |         self._weight_sharing = weight_sharing
 75 | 
 76 |         self._word_emb_name = "word_embedding"
 77 |         self._pos_emb_name = "pos_embedding"
 78 |         self._sent_emb_name = "sent_embedding"
 79 |         self._dtype = "float16" if use_fp16 else "float32"
 80 | 
 81 |         # Initialize all weigths by truncated normal initializer, and all biases 
 82 |         # will be initialized by constant zero by default.
 83 |         self._param_initializer = fluid.initializer.TruncatedNormal(
 84 |             scale=config['initializer_range'])
 85 | 
 86 |         self._build_model(src_ids, position_ids, sentence_ids, input_mask)
 87 | 
 88 |     def _build_model(self, src_ids, position_ids, sentence_ids, input_mask):
 89 |         # padding id in vocabulary must be set to 0
 90 |         emb_out = fluid.input.embedding(
 91 |             input=src_ids,
 92 |             size=[self._voc_size, self._emb_size],
 93 |             dtype=self._dtype,
 94 |             param_attr=fluid.ParamAttr(
 95 |                 name=self._word_emb_name, initializer=self._param_initializer),
 96 |             is_sparse=False)
 97 |         position_emb_out = fluid.input.embedding(
 98 |             input=position_ids,
 99 |             size=[self._max_position_seq_len, self._emb_size],
100 |             dtype=self._dtype,
101 |             param_attr=fluid.ParamAttr(
102 |                 name=self._pos_emb_name, initializer=self._param_initializer))
103 | 
104 |         sent_emb_out = fluid.input.embedding(
105 |             sentence_ids,
106 |             size=[self._sent_types, self._emb_size],
107 |             dtype=self._dtype,
108 |             param_attr=fluid.ParamAttr(
109 |                 name=self._sent_emb_name, initializer=self._param_initializer))
110 | 
111 |         emb_out = emb_out + position_emb_out
112 |         emb_out = emb_out + sent_emb_out
113 | 
114 |         emb_out = pre_process_layer(
115 |             emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder')
116 | 
117 |         if self._dtype == "float16":
118 |             input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype)
119 | 
120 |         self_attn_mask = fluid.layers.matmul(
121 |             x=input_mask, y=input_mask, transpose_y=True)
122 |         self_attn_mask = fluid.layers.scale(
123 |             x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False)
124 |         n_head_self_attn_mask = fluid.layers.stack(
125 |             x=[self_attn_mask] * self._n_head, axis=1)
126 |         n_head_self_attn_mask.stop_gradient = True
127 | 
128 |         self._enc_out = encoder(
129 |             enc_input=emb_out,
130 |             attn_bias=n_head_self_attn_mask,
131 |             n_layer=self._n_layer,
132 |             n_head=self._n_head,
133 |             d_key=self._emb_size // self._n_head,
134 |             d_value=self._emb_size // self._n_head,
135 |             d_model=self._emb_size,
136 |             d_inner_hid=self._emb_size * 4,
137 |             prepostprocess_dropout=self._prepostprocess_dropout,
138 |             attention_dropout=self._attention_dropout,
139 |             relu_dropout=0,
140 |             hidden_act=self._hidden_act,
141 |             preprocess_cmd="",
142 |             postprocess_cmd="dan",
143 |             param_initializer=self._param_initializer,
144 |             name='encoder')
145 | 
146 |     def get_sequence_output(self):
147 |         return self._enc_out
148 | 
149 |     def get_pooled_output(self):
150 |         """Get the first feature of each sequence for classification"""
151 | 
152 |         next_sent_feat = fluid.layers.slice(
153 |             input=self._enc_out, axes=[1], starts=[0], ends=[1])
154 |         next_sent_feat = fluid.layers.fc(
155 |             input=next_sent_feat,
156 |             size=self._emb_size,
157 |             act="tanh",
158 |             param_attr=fluid.ParamAttr(
159 |                 name="pooled_fc.w_0", initializer=self._param_initializer),
160 |             bias_attr="pooled_fc.b_0")
161 |         return next_sent_feat
162 | 
163 |     def get_pretraining_output(self, mask_label, mask_pos, labels):
164 |         """Get the loss & accuracy for pretraining"""
165 | 
166 |         mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
167 | 
168 |         # extract the first token feature in each sentence
169 |         next_sent_feat = self.get_pooled_output()
170 |         reshaped_emb_out = fluid.layers.reshape(
171 |             x=self._enc_out, shape=[-1, self._emb_size])
172 |         # extract masked tokens' feature
173 |         mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos)
174 | 
175 |         # transform: fc
176 |         mask_trans_feat = fluid.layers.fc(
177 |             input=mask_feat,
178 |             size=self._emb_size,
179 |             act=self._hidden_act,
180 |             param_attr=fluid.ParamAttr(
181 |                 name='mask_lm_trans_fc.w_0',
182 |                 initializer=self._param_initializer),
183 |             bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0'))
184 |         # transform: layer norm 
185 |         mask_trans_feat = pre_process_layer(
186 |             mask_trans_feat, 'n', name='mask_lm_trans')
187 | 
188 |         mask_lm_out_bias_attr = fluid.ParamAttr(
189 |             name="mask_lm_out_fc.b_0",
190 |             initializer=fluid.initializer.Constant(value=0.0))
191 |         if self._weight_sharing:
192 |             fc_out = fluid.layers.matmul(
193 |                 x=mask_trans_feat,
194 |                 y=fluid.default_main_program().global_block().var(
195 |                     self._word_emb_name),
196 |                 transpose_y=True)
197 |             fc_out += fluid.layers.create_parameter(
198 |                 shape=[self._voc_size],
199 |                 dtype=self._dtype,
200 |                 attr=mask_lm_out_bias_attr,
201 |                 is_bias=True)
202 | 
203 |         else:
204 |             fc_out = fluid.layers.fc(input=mask_trans_feat,
205 |                                      size=self._voc_size,
206 |                                      param_attr=fluid.ParamAttr(
207 |                                          name="mask_lm_out_fc.w_0",
208 |                                          initializer=self._param_initializer),
209 |                                      bias_attr=mask_lm_out_bias_attr)
210 | 
211 |         mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
212 |             logits=fc_out, label=mask_label)
213 |         mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss)
214 | 
215 |         next_sent_fc_out = fluid.layers.fc(
216 |             input=next_sent_feat,
217 |             size=2,
218 |             param_attr=fluid.ParamAttr(
219 |                 name="next_sent_fc.w_0", initializer=self._param_initializer),
220 |             bias_attr="next_sent_fc.b_0")
221 | 
222 |         next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy(
223 |             logits=next_sent_fc_out, label=labels, return_softmax=True)
224 | 
225 |         next_sent_acc = fluid.layers.accuracy(
226 |             input=next_sent_softmax, label=labels)
227 | 
228 |         mean_next_sent_loss = fluid.layers.mean(next_sent_loss)
229 | 
230 |         loss = mean_next_sent_loss + mean_mask_lm_loss
231 |         return next_sent_acc, mean_mask_lm_loss, loss
232 | 


--------------------------------------------------------------------------------