├── README.md
├── np_to_tf.py
└── pad_to_np.py


/README.md:
--------------------------------------------------------------------------------
 1 | # tensorflow_ernie
 2 | 将百度ernie的paddlepaddle模型转成tensorflow模型
 3 | 
 4 | 
 5 | 
 6 | 2019年10月12日 更新：<br/>
 7 | 在pad_to_np.py中调用create_model时增加参数is_classify=Ture，以适应新版本ernie代码
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 2019年9月25日 更新：
15 | 
16 | 上传转换代码
17 | 步骤:
18 | 1. 将两个py文件放到ernie的根目录
19 | 2. 更改pad_to_np里的模型路径，然后python pad_to_np.py
20 | 3. python np_to_tf.py
21 | 
22 | 
23 | --------------------------------------------------
24 | 2019年5月21日 更新：
25 | 链接:https://pan.baidu.com/s/1I7kKVlZN6hl-sUbnvttJzA 
26 | 提取码:iq74 
27 | 


--------------------------------------------------------------------------------
/np_to_tf.py:
--------------------------------------------------------------------------------
  1 | #coding:utf-8
  2 | ###################################################
  3 | # File Name: covert.py
  4 | # Author: Meng Zhao
  5 | # mail: @
  6 | # Created Time: 2019年04月01日 星期一 16时05分26秒
  7 | #=============================================================
  8 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  9 | #
 10 | # Licensed under the Apache License, Version 2.0 (the "License");
 11 | # you may not use this file except in compliance with the License.
 12 | # You may obtain a copy of the License at
 13 | #
 14 | #     http://www.apache.org/licenses/LICENSE-2.0
 15 | #
 16 | # Unless required by applicable law or agreed to in writing, software
 17 | # distributed under the License is distributed on an "AS IS" BASIS,
 18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 19 | # See the License for the specific language governing permissions and
 20 | # limitations under the License.
 21 | """Convert Google official BERT models to Fluid parameters."""
 22 | 
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | 
 27 | import os
 28 | import numpy as np
 29 | import joblib
 30 | import tensorflow as tf
 31 | 
 32 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
 33 | 
 34 | def define_variable(params, fluid_name='', tf_name=''):
 35 |     variable = tf.Variable(tf.convert_to_tensor(params[fluid_name]), name=tf_name)
 36 | 
 37 | def np_to_tensor(params):
 38 |     tf_prefix = 'bert'
 39 | 
 40 |     #embeddings
 41 |     tf_embedding_prefix = tf_prefix + '/embeddings'
 42 |     define_variable(params, 'pre_encoder_layer_norm_scale', tf_embedding_prefix + '/LayerNorm/gamma')
 43 |     define_variable(params, 'pre_encoder_layer_norm_bias',  tf_embedding_prefix + '/LayerNorm/beta')
 44 |     define_variable(params, 'pos_embedding',                tf_embedding_prefix + '/position_embeddings')
 45 |     define_variable(params, 'word_embedding',               tf_embedding_prefix + '/word_embeddings')
 46 |     define_variable(params, 'sent_embedding',               tf_embedding_prefix + '/token_type_embeddings')
 47 | 
 48 |     #layers
 49 |     tf_encoder_prefix = tf_prefix + '/encoder/layer_'
 50 |     for i in range(12):
 51 |         fluid_prefix = "encoder_layer_" + str(i)
 52 |         define_variable(params, fluid_prefix + '_post_att_layer_norm_scale',     tf_encoder_prefix + str(i) + '/attention/output/LayerNorm/gamma')
 53 |         define_variable(params, fluid_prefix + '_post_att_layer_norm_bias',      tf_encoder_prefix + str(i) + '/attention/output/LayerNorm/beta')
 54 |         define_variable(params, fluid_prefix + '_multi_head_att_output_fc.w_0',  tf_encoder_prefix + str(i) + '/attention/output/dense/kernel')
 55 |         define_variable(params, fluid_prefix + '_multi_head_att_output_fc.b_0',  tf_encoder_prefix + str(i) + '/attention/output/dense/bias')
 56 |         define_variable(params, fluid_prefix + '_multi_head_att_key_fc.w_0',     tf_encoder_prefix + str(i) + '/attention/self/key/kernel')
 57 |         define_variable(params, fluid_prefix + '_multi_head_att_key_fc.b_0',     tf_encoder_prefix + str(i) + '/attention/self/key/bias')
 58 |         define_variable(params, fluid_prefix + '_multi_head_att_query_fc.w_0',   tf_encoder_prefix + str(i) + '/attention/self/query/kernel')
 59 |         define_variable(params, fluid_prefix + '_multi_head_att_query_fc.b_0',   tf_encoder_prefix + str(i) + '/attention/self/query/bias')
 60 |         define_variable(params, fluid_prefix + '_multi_head_att_value_fc.w_0',   tf_encoder_prefix + str(i) + '/attention/self/value/kernel')
 61 |         define_variable(params, fluid_prefix + '_multi_head_att_value_fc.b_0',   tf_encoder_prefix + str(i) + '/attention/self/value/bias')
 62 |         define_variable(params, fluid_prefix + '_ffn_fc_0.w_0',                  tf_encoder_prefix + str(i) + '/intermediate/dense/kernel')
 63 |         define_variable(params, fluid_prefix + '_ffn_fc_0.b_0',                  tf_encoder_prefix + str(i) + '/intermediate/dense/bias')
 64 |         define_variable(params, fluid_prefix + '_post_ffn_layer_norm_scale',     tf_encoder_prefix + str(i) + '/output/LayerNorm/gamma')
 65 |         define_variable(params, fluid_prefix + '_post_ffn_layer_norm_bias',      tf_encoder_prefix + str(i) + '/output/LayerNorm/beta')
 66 |         define_variable(params, fluid_prefix + '_ffn_fc_1.w_0',                  tf_encoder_prefix + str(i) + '/output/dense/kernel')
 67 |         define_variable(params, fluid_prefix + '_ffn_fc_1.b_0',                  tf_encoder_prefix + str(i) + '/output/dense/bias')
 68 |     
 69 |     #pooler
 70 |     tf_pooler_prefix = tf_prefix + '/pooler'
 71 |     define_variable(params, 'pooled_fc.w_0',  tf_pooler_prefix + '/dense/kernel')
 72 |     define_variable(params, 'pooled_fc.b_0',  tf_pooler_prefix + '/dense/bias')
 73 | 
 74 | 
 75 |     #cls
 76 |     #define_variable(params, 'mask_lm_out_fc.b_0',               'cls/predictions/output_bias')
 77 |     #define_variable(params, 'mask_lm_trans_layer_norm_scale',   'cls/predictions/transform/LayerNorm/gamma')
 78 |     #define_variable(params, 'mask_lm_trans_layer_norm_bias',    'cls/predictions/transform/LayerNorm/beta')
 79 |     #define_variable(params, 'mask_lm_trans_fc.w_0',             'cls/predictions/transform/dense/kernel')
 80 |     #define_variable(params, 'mask_lm_trans_fc.b_0',             'cls/predictions/transform/dense/bias')
 81 |     #define_variable(params, 'next_sent_fc.w_0',                 'cls/seq_relationship/output_weights')
 82 |     #define_variable(params, 'next_sent_fc.b_0',                 'cls/seq_relationship/output_bias')
 83 |     #define_variable(params, 'cls_squad_out_w',                  'cls/squad/output_weights')
 84 |     #define_variable(params, 'cls_squad_out_b',                  'cls/squad/output_bias')
 85 | 
 86 | 
 87 | 
 88 | 
 89 | def covert(input_file):
 90 |     params = joblib.load(input_file)
 91 | 
 92 | 
 93 |     graph = tf.Graph()
 94 |     with graph.as_default():
 95 |         #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4)
 96 |         #sess_config = tf.ConfigProto(gpu_options=gpu_options)
 97 |         #sess = tf.Session(sess_config)
 98 |         sess = tf.Session()
 99 |         np_to_tensor(params)
100 | 
101 | 
102 |         saver = tf.train.Saver()
103 |         sess.run(tf.global_variables_initializer())
104 |         with sess.as_default():
105 |             checkpoint_dir = 'checkpoints'
106 |             checkpoint_prefix = os.path.join(checkpoint_dir, 'bert_model.ckpt')
107 |             if not os.path.exists(checkpoint_dir):
108 |                 os.makedirs(checkpoint_dir)
109 |             saver.save(sess, checkpoint_prefix)
110 | 
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     covert('params.dict')
115 |     pass
116 | 


--------------------------------------------------------------------------------
/pad_to_np.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | ###################################################
 3 | # File Name: pad_to_np.py
 4 | # Author: Meng Zhao
 5 | # mail: @
 6 | # Created Time: 2019年04月02日 星期二 17时12分25秒
 7 | #=============================================================
 8 | import paddle.fluid as fluid
 9 | import joblib
10 | from model.ernie import ErnieConfig
11 | from utils.init import init_checkpoint, init_pretraining_params
12 | from finetune.classifier import create_model
13 | 
14 | 
15 | import numpy as np
16 | import os
17 | import argparse
18 | 
19 | 
20 | 
21 | parser = argparse.ArgumentParser()
22 | parser.add_argument("--init_checkpoint", default='/root/zhaomeng/baidu_ERNIE/LARK/ERNIE/params', type=str, help=".")
23 | parser.add_argument("--ernie_config_path", default='/root/zhaomeng/baidu_ERNIE/LARK/ERNIE/config/ernie_config.json', type=str, help=".")
24 | parser.add_argument("--max_seq_len", default=128, type=int, help=".")
25 | parser.add_argument("--num_labels", default=2, type=int, help=".")
26 | parser.add_argument("--use_fp16", type=bool, default=False, help="Whether to use fp16 mixed precision training.")
27 | 
28 | 
29 | args = parser.parse_args()
30 | 
31 | 
32 | 
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     if not args.init_checkpoint:
37 |         raise ValueError("args 'init_checkpoint' should be set if"
38 |                              "only doing validation or testing!")
39 | 
40 |     ernie_config = ErnieConfig(args.ernie_config_path)
41 |     ernie_config.print_config()
42 | 
43 |     place = fluid.CPUPlace()
44 |     exe = fluid.Executor(place)
45 | 
46 |     startup_prog = fluid.Program()
47 |     test_program = fluid.Program()
48 | 
49 |     with fluid.program_guard(test_program, startup_prog):
50 |         with fluid.unique_name.guard():
51 |             _, _ = create_model(
52 |                     args,
53 |                     pyreader_name='test_reader',
54 |                     ernie_config=ernie_config,
55 |                     is_classify=True)
56 |     
57 |     exe.run(startup_prog)
58 |     
59 | 
60 |     init_pretraining_params(
61 |                     exe,   
62 |                     args.init_checkpoint,
63 |                     main_program=test_program,
64 |                     #main_program=startup_prog,
65 |                     use_fp16=args.use_fp16)
66 | 
67 |     name2params = {}
68 |     prefix = args.init_checkpoint
69 |     for var in startup_prog.list_vars():
70 |         path = os.path.join(prefix, var.name)
71 |         if os.path.exists(path):
72 |             cur_tensor = fluid.global_scope().find_var(var.name).get_tensor()
73 |             print(var.name, np.array(cur_tensor).shape)
74 |             name2params[var.name] = np.array(cur_tensor)
75 | 
76 |     joblib.dump(name2params, 'params.dict') 
77 | 
78 | 


--------------------------------------------------------------------------------