├── README.md ├── np_to_tf.py └── pad_to_np.py /README.md: -------------------------------------------------------------------------------- 1 | # tensorflow_ernie 2 | 将百度ernie的paddlepaddle模型转成tensorflow模型 3 | 4 | 5 | 6 | 2019年10月12日 更新:
7 | 在pad_to_np.py中调用create_model时增加参数is_classify=Ture,以适应新版本ernie代码 8 | 9 | 10 | 11 | 12 | 13 | 14 | 2019年9月25日 更新: 15 | 16 | 上传转换代码 17 | 步骤: 18 | 1. 将两个py文件放到ernie的根目录 19 | 2. 更改pad_to_np里的模型路径,然后python pad_to_np.py 20 | 3. python np_to_tf.py 21 | 22 | 23 | -------------------------------------------------- 24 | 2019年5月21日 更新: 25 | 链接:https://pan.baidu.com/s/1I7kKVlZN6hl-sUbnvttJzA 26 | 提取码:iq74 27 | -------------------------------------------------------------------------------- /np_to_tf.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | ################################################### 3 | # File Name: covert.py 4 | # Author: Meng Zhao 5 | # mail: @ 6 | # Created Time: 2019年04月01日 星期一 16时05分26秒 7 | #============================================================= 8 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, software 17 | # distributed under the License is distributed on an "AS IS" BASIS, 18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | # See the License for the specific language governing permissions and 20 | # limitations under the License. 21 | """Convert Google official BERT models to Fluid parameters.""" 22 | 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import os 28 | import numpy as np 29 | import joblib 30 | import tensorflow as tf 31 | 32 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 33 | 34 | def define_variable(params, fluid_name='', tf_name=''): 35 | variable = tf.Variable(tf.convert_to_tensor(params[fluid_name]), name=tf_name) 36 | 37 | def np_to_tensor(params): 38 | tf_prefix = 'bert' 39 | 40 | #embeddings 41 | tf_embedding_prefix = tf_prefix + '/embeddings' 42 | define_variable(params, 'pre_encoder_layer_norm_scale', tf_embedding_prefix + '/LayerNorm/gamma') 43 | define_variable(params, 'pre_encoder_layer_norm_bias', tf_embedding_prefix + '/LayerNorm/beta') 44 | define_variable(params, 'pos_embedding', tf_embedding_prefix + '/position_embeddings') 45 | define_variable(params, 'word_embedding', tf_embedding_prefix + '/word_embeddings') 46 | define_variable(params, 'sent_embedding', tf_embedding_prefix + '/token_type_embeddings') 47 | 48 | #layers 49 | tf_encoder_prefix = tf_prefix + '/encoder/layer_' 50 | for i in range(12): 51 | fluid_prefix = "encoder_layer_" + str(i) 52 | define_variable(params, fluid_prefix + '_post_att_layer_norm_scale', tf_encoder_prefix + str(i) + '/attention/output/LayerNorm/gamma') 53 | define_variable(params, fluid_prefix + '_post_att_layer_norm_bias', tf_encoder_prefix + str(i) + '/attention/output/LayerNorm/beta') 54 | define_variable(params, fluid_prefix + '_multi_head_att_output_fc.w_0', tf_encoder_prefix + str(i) + '/attention/output/dense/kernel') 55 | define_variable(params, fluid_prefix + '_multi_head_att_output_fc.b_0', tf_encoder_prefix + str(i) + '/attention/output/dense/bias') 56 | define_variable(params, fluid_prefix + '_multi_head_att_key_fc.w_0', tf_encoder_prefix + str(i) + '/attention/self/key/kernel') 57 | define_variable(params, fluid_prefix + '_multi_head_att_key_fc.b_0', tf_encoder_prefix + str(i) + '/attention/self/key/bias') 58 | define_variable(params, fluid_prefix + '_multi_head_att_query_fc.w_0', tf_encoder_prefix + str(i) + '/attention/self/query/kernel') 59 | define_variable(params, fluid_prefix + '_multi_head_att_query_fc.b_0', tf_encoder_prefix + str(i) + '/attention/self/query/bias') 60 | define_variable(params, fluid_prefix + '_multi_head_att_value_fc.w_0', tf_encoder_prefix + str(i) + '/attention/self/value/kernel') 61 | define_variable(params, fluid_prefix + '_multi_head_att_value_fc.b_0', tf_encoder_prefix + str(i) + '/attention/self/value/bias') 62 | define_variable(params, fluid_prefix + '_ffn_fc_0.w_0', tf_encoder_prefix + str(i) + '/intermediate/dense/kernel') 63 | define_variable(params, fluid_prefix + '_ffn_fc_0.b_0', tf_encoder_prefix + str(i) + '/intermediate/dense/bias') 64 | define_variable(params, fluid_prefix + '_post_ffn_layer_norm_scale', tf_encoder_prefix + str(i) + '/output/LayerNorm/gamma') 65 | define_variable(params, fluid_prefix + '_post_ffn_layer_norm_bias', tf_encoder_prefix + str(i) + '/output/LayerNorm/beta') 66 | define_variable(params, fluid_prefix + '_ffn_fc_1.w_0', tf_encoder_prefix + str(i) + '/output/dense/kernel') 67 | define_variable(params, fluid_prefix + '_ffn_fc_1.b_0', tf_encoder_prefix + str(i) + '/output/dense/bias') 68 | 69 | #pooler 70 | tf_pooler_prefix = tf_prefix + '/pooler' 71 | define_variable(params, 'pooled_fc.w_0', tf_pooler_prefix + '/dense/kernel') 72 | define_variable(params, 'pooled_fc.b_0', tf_pooler_prefix + '/dense/bias') 73 | 74 | 75 | #cls 76 | #define_variable(params, 'mask_lm_out_fc.b_0', 'cls/predictions/output_bias') 77 | #define_variable(params, 'mask_lm_trans_layer_norm_scale', 'cls/predictions/transform/LayerNorm/gamma') 78 | #define_variable(params, 'mask_lm_trans_layer_norm_bias', 'cls/predictions/transform/LayerNorm/beta') 79 | #define_variable(params, 'mask_lm_trans_fc.w_0', 'cls/predictions/transform/dense/kernel') 80 | #define_variable(params, 'mask_lm_trans_fc.b_0', 'cls/predictions/transform/dense/bias') 81 | #define_variable(params, 'next_sent_fc.w_0', 'cls/seq_relationship/output_weights') 82 | #define_variable(params, 'next_sent_fc.b_0', 'cls/seq_relationship/output_bias') 83 | #define_variable(params, 'cls_squad_out_w', 'cls/squad/output_weights') 84 | #define_variable(params, 'cls_squad_out_b', 'cls/squad/output_bias') 85 | 86 | 87 | 88 | 89 | def covert(input_file): 90 | params = joblib.load(input_file) 91 | 92 | 93 | graph = tf.Graph() 94 | with graph.as_default(): 95 | #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4) 96 | #sess_config = tf.ConfigProto(gpu_options=gpu_options) 97 | #sess = tf.Session(sess_config) 98 | sess = tf.Session() 99 | np_to_tensor(params) 100 | 101 | 102 | saver = tf.train.Saver() 103 | sess.run(tf.global_variables_initializer()) 104 | with sess.as_default(): 105 | checkpoint_dir = 'checkpoints' 106 | checkpoint_prefix = os.path.join(checkpoint_dir, 'bert_model.ckpt') 107 | if not os.path.exists(checkpoint_dir): 108 | os.makedirs(checkpoint_dir) 109 | saver.save(sess, checkpoint_prefix) 110 | 111 | 112 | 113 | if __name__ == '__main__': 114 | covert('params.dict') 115 | pass 116 | -------------------------------------------------------------------------------- /pad_to_np.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | ################################################### 3 | # File Name: pad_to_np.py 4 | # Author: Meng Zhao 5 | # mail: @ 6 | # Created Time: 2019年04月02日 星期二 17时12分25秒 7 | #============================================================= 8 | import paddle.fluid as fluid 9 | import joblib 10 | from model.ernie import ErnieConfig 11 | from utils.init import init_checkpoint, init_pretraining_params 12 | from finetune.classifier import create_model 13 | 14 | 15 | import numpy as np 16 | import os 17 | import argparse 18 | 19 | 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--init_checkpoint", default='/root/zhaomeng/baidu_ERNIE/LARK/ERNIE/params', type=str, help=".") 23 | parser.add_argument("--ernie_config_path", default='/root/zhaomeng/baidu_ERNIE/LARK/ERNIE/config/ernie_config.json', type=str, help=".") 24 | parser.add_argument("--max_seq_len", default=128, type=int, help=".") 25 | parser.add_argument("--num_labels", default=2, type=int, help=".") 26 | parser.add_argument("--use_fp16", type=bool, default=False, help="Whether to use fp16 mixed precision training.") 27 | 28 | 29 | args = parser.parse_args() 30 | 31 | 32 | 33 | 34 | 35 | if __name__ == '__main__': 36 | if not args.init_checkpoint: 37 | raise ValueError("args 'init_checkpoint' should be set if" 38 | "only doing validation or testing!") 39 | 40 | ernie_config = ErnieConfig(args.ernie_config_path) 41 | ernie_config.print_config() 42 | 43 | place = fluid.CPUPlace() 44 | exe = fluid.Executor(place) 45 | 46 | startup_prog = fluid.Program() 47 | test_program = fluid.Program() 48 | 49 | with fluid.program_guard(test_program, startup_prog): 50 | with fluid.unique_name.guard(): 51 | _, _ = create_model( 52 | args, 53 | pyreader_name='test_reader', 54 | ernie_config=ernie_config, 55 | is_classify=True) 56 | 57 | exe.run(startup_prog) 58 | 59 | 60 | init_pretraining_params( 61 | exe, 62 | args.init_checkpoint, 63 | main_program=test_program, 64 | #main_program=startup_prog, 65 | use_fp16=args.use_fp16) 66 | 67 | name2params = {} 68 | prefix = args.init_checkpoint 69 | for var in startup_prog.list_vars(): 70 | path = os.path.join(prefix, var.name) 71 | if os.path.exists(path): 72 | cur_tensor = fluid.global_scope().find_var(var.name).get_tensor() 73 | print(var.name, np.array(cur_tensor).shape) 74 | name2params[var.name] = np.array(cur_tensor) 75 | 76 | joblib.dump(name2params, 'params.dict') 77 | 78 | --------------------------------------------------------------------------------