├── README.md
├── _tensorflow
    ├── dataset.py
    ├── early_stop.py
    ├── estimator.py
    ├── hashtable.py
    ├── serving
    │   ├── serving-model
    │   │   ├── batch.config
    │   │   └── model.conf
    │   ├── serving_model_save.py
    │   └── warmup.py
    ├── tensorflow-custom-op
    │   ├── LICENSE
    │   ├── MANIFEST.in
    │   ├── Makefile
    │   ├── README.md
    │   ├── WORKSPACE
    │   └── tensorflow_binary_code_hash
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── cc
    │   │       ├── kernels
    │   │       │   ├── binary_code_hash.h
    │   │       │   ├── binary_code_hash_kernels.cc
    │   │       │   ├── binary_code_hash_kernels.cu.cc
    │   │       │   └── binary_code_hash_only_cpu_kernels.cc
    │   │       └── ops
    │   │       │   └── binary_code_hash_ops.cc
    │   │   └── python
    │   │       ├── __init__.py
    │   │       └── ops
    │   │           ├── __init__.py
    │   │           ├── binary_code_hash_ops.py
    │   │           └── binary_code_hash_test.py
    └── tfrecord.py
├── embedding
    ├── binary_code_hash_embedding
    │   ├── LICENSE
    │   ├── MANIFEST.in
    │   ├── Makefile
    │   ├── README.md
    │   ├── WORKSPACE
    │   ├── binary_code_hash_embedding.py
    │   └── tensorflow_binary_code_hash
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── cc
    │   │       ├── kernels
    │   │       │   ├── binary_code_hash.h
    │   │       │   ├── binary_code_hash_kernels.cc
    │   │       │   ├── binary_code_hash_kernels.cu.cc
    │   │       │   └── binary_code_hash_only_cpu_kernels.cc
    │   │       └── ops
    │   │       │   └── binary_code_hash_ops.cc
    │   │   └── python
    │   │       ├── __init__.py
    │   │       └── ops
    │   │           ├── __init__.py
    │   │           ├── binary_code_hash_ops.py
    │   │           └── binary_code_hash_test.py
    └── qr_hash.py
├── llms
    ├── README.md
    ├── __init__.py
    ├── langchain_tutorial
    │   ├── __init__.py
    │   ├── callbacks
    │   │   ├── generic_llms_info.py
    │   │   └── manager.py
    │   ├── chatmodel.ipynb
    │   ├── document_loaders.ipynb
    │   ├── embeddings.ipynb
    │   ├── examples
    │   │   ├── mit.txt
    │   │   ├── no_fields_name.csv
    │   │   ├── rag.txt
    │   │   ├── sql.md
    │   │   ├── sql.pdf
    │   │   └── test.csv
    │   ├── llms.ipynb
    │   ├── output_parser.ipynb
    │   ├── prompt_templates.ipynb
    │   ├── requirements.txt
    │   ├── retrievers.ipynb
    │   ├── text_splitter.ipynb
    │   ├── tongyi
    │   │   ├── chat_model.py
    │   │   ├── embeddings.py
    │   │   └── llm.py
    │   ├── tools
    │   │   ├── __init__.py
    │   │   ├── function_calling.py
    │   │   └── tools.py
    │   ├── tools_agents.ipynb
    │   └── vector_store.ipynb
    └── train
    │   ├── README.md
    │   └── deepseek-train
    │       ├── README.md
    │       ├── grpo_train.py
    │       ├── inference.py
    │       ├── main.py
    │       ├── requirements.txt
    │       ├── reward.py
    │       ├── sft_train.py
    │       └── utils.py
├── multimodal
    └── stable_diffusion
    │   ├── README.md
    │   ├── webui_colab.ipynb
    │   ├── webui_kaggle.ipynb
    │   └── 变形金刚风格.png
├── multitasklearning
    ├── README.md
    ├── loss
    │   ├── dwa.py
    │   ├── grad_norm.py
    │   ├── pareto_efficient.py
    │   └── uncertainty_weight.py
    ├── mmoe.py
    ├── ple.py
    ├── shared_bottom.py
    ├── stem.py
    └── torch
    │   ├── __init__.py
    │   ├── mmoe.py
    │   ├── ple.py
    │   └── utils.py
├── nlp
    ├── masked_language_model
    │   ├── README.md
    │   ├── data
    │   │   ├── bert
    │   │   │   ├── bert_config.json
    │   │   │   └── vocab.txt
    │   │   ├── example_no_sop.txt
    │   │   ├── example_sop.txt
    │   │   └── stopwords.txt
    │   ├── pt
    │   │   ├── config.py
    │   │   ├── create_pretraining_data.py
    │   │   ├── export.py
    │   │   ├── model.py
    │   │   ├── requirements.txt
    │   │   ├── run_finetuning.py
    │   │   ├── run_pretraining.py
    │   │   └── tfrecord
    │   │   │   ├── __init__.py
    │   │   │   ├── example_pb2.py
    │   │   │   ├── iterator_utils.py
    │   │   │   ├── reader.py
    │   │   │   ├── tools
    │   │   │       ├── __init__.py
    │   │   │       └── tfrecord2idx.py
    │   │   │   ├── torch
    │   │   │       ├── __init__.py
    │   │   │       └── dataset.py
    │   │   │   └── writer.py
    │   └── tf
    │   │   ├── config.py
    │   │   ├── create_pretraining_data.py
    │   │   ├── lamb_optimizer.py
    │   │   ├── modeling.py
    │   │   ├── optimization.py
    │   │   ├── requirements.txt
    │   │   ├── run_pretraining.py
    │   │   ├── run_pretraining_sess.py
    │   │   └── tokenization.py
    └── sentence_bert
    │   ├── __init__.py
    │   ├── bert.py
    │   ├── sbert
    │       └── model.py
    │   └── utils.py
├── recommendation
    ├── README.md
    ├── __init__.py
    ├── cow_clip.py
    ├── match
    │   ├── comirec.py
    │   └── mind.py
    ├── multidomain
    │   ├── README.md
    │   ├── _unittest.py
    │   ├── m2m.py
    │   ├── pepnet.py
    │   ├── sarnet.py
    │   └── star.py
    ├── rank
    │   ├── README.md
    │   ├── _unittest.py
    │   ├── adaf2m2.py
    │   ├── contextnet.py
    │   ├── dcn.py
    │   ├── deepcrossing.py
    │   ├── deepfm.py
    │   ├── dien.py
    │   ├── din.py
    │   ├── dsin.py
    │   ├── fibinet.py
    │   ├── fms.py
    │   ├── fnn.py
    │   ├── gwpfm.py
    │   ├── hmoe.py
    │   ├── interaction_expert.py
    │   ├── masknet.py
    │   ├── nfm.py
    │   ├── pnn.py
    │   ├── tin.py
    │   └── xdeepfm.py
    └── utils
    │   ├── __init__.py
    │   ├── core.py
    │   ├── interaction.py
    │   ├── losses.py
    │   ├── rnn.py
    │   ├── train_utils.py
    │   ├── transformer.py
    │   └── type_declaration.py
├── trick
    ├── README.md
    ├── ema.py
    ├── ema_pt.py
    ├── gradient_accumulation.py
    ├── gradient_accumulation_pt.py
    ├── hierarchical_lr.py
    ├── hierarchical_lr_pt.py
    ├── initialization.py
    ├── swa.py
    ├── swa_pt.py
    └── unbalance
    │   ├── loss.py
    │   └── loss_pt.py
└── triton
    ├── client
        ├── _grpc.py
        └── _http.py
    └── gen_model
        ├── onnx_model.py
        ├── tensorflow_model.py
        ├── tensorrt_model.py
        └── torch_model.py


/_tensorflow/dataset.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | 
 5 | d = np.arange(0,60).reshape([6, 10])
 6 | 
 7 | # 将array转化为tensor
 8 | data = tf.data.Dataset.from_tensor_slices(d)
 9 | 
10 | # 从data数据集中按顺序抽取buffer_size个样本放在buffer中，然后打乱buffer中的样本
11 | # buffer中样本个数不足buffer_size，继续从data数据集中安顺序填充至buffer_size，
12 | # 此时会再次打乱
13 | data = data.shuffle(buffer_size=3)
14 | 
15 | # 每次从buffer中抽取4个样本
16 | data = data.batch(4)
17 | 
18 | # 将data数据集重复，其实就是2个epoch数据集
19 | data = data.repeat(2)
20 | 
21 | # 构造获取数据的迭代器
22 | iters = data.make_one_shot_iterator()
23 | 
24 | # 每次从迭代器中获取一批数据
25 | batch = iters.get_next()
26 | 
27 | sess = tf.Session()
28 | 
29 | sess.run(batch)
30 | # 数据集完成遍历完之后，继续抽取的话会报错：OutOfRangeError
31 | 
32 | """
33 | In [21]: d
34 | Out[21]: 
35 | array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
36 |        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
37 |        [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
38 |        [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
39 |        [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
40 |        [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])
41 | In [22]: sess.run(batch)
42 | Out[22]: 
43 | array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
44 |        [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
45 |        [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
46 |        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])
47 | 
48 | In [23]: sess.run(batch)
49 | Out[23]: 
50 | array([[40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
51 |        [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])
52 | """
53 | 
54 | data = data.repeat(2)
55 | 
56 | data = data.shuffle(buffer_size=3)
57 | 
58 | data = data.batch(4)
59 | 
60 | """
61 | In [25]: sess.run(batch)
62 | Out[25]: 
63 | array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
64 |        [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
65 |        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
66 |        [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])
67 | 
68 | In [26]: sess.run(batch)
69 | Out[26]: 
70 | array([[50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
71 |        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
72 |        [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
73 |        [30, 31, 32, 33, 34, 35, 36, 37, 38, 39]])
74 | 
75 | In [27]: sess.run(batch)
76 | Out[27]: 
77 | array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
78 |        [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
79 |        [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
80 |        [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])
81 | 
82 | """


--------------------------------------------------------------------------------
/_tensorflow/early_stop.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from estimator import model_fn, input_fn_bulider
 4 | 
 5 | # 设置训练多少步就进行模型的保存
 6 | runConfig = tf.estimator.RunConfig(save_checkpoints_steps=10)
 7 | 
 8 | estimator = tf.estimator.Estimator(model_fn,
 9 |                                    model_dir='your_save_path',
10 |                                    config=runConfig,
11 |                                    params={'lr': 0.01})
12 | 
13 | # 在这里定义一个early stop
14 | # 在eval过程执行early stop判断，所以评判标准也是eval数据集的metric_name
15 | # max_steps_without_decrease：loss最多多少次不降低就停止。进行一次eval相当于一步。
16 | early_stop = tf.estimator.experimental.stop_if_no_decrease_hook(estimator,
17 |                                                                 metric_name='loss',
18 |                                                                 max_steps_without_decrease=1,
19 |                                                                 run_every_steps=1,
20 |                                                                 run_every_secs=None)
21 | 
22 | logging_hook = tf.train.LoggingTensorHook(every_n_iter=1,
23 |                                           tensors={'loss': 'loss:0'})
24 | 
25 | # 定义训练(train)过程的数据输入方式
26 | train_input_fn = input_fn_bulider('train.tfrecord', batch_size=1, is_training=True)
27 | # 定义验证(eval)过程的数据输入方式
28 | eval_input_fn = input_fn_bulider('eval.tfrecord', batch_size=1, is_training=False)
29 | 
30 | # 创建一个TrainSpec实例
31 | train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=100,
32 |                                     hooks=[logging_hook, early_stop])
33 | # 创建一个EvalSpec实例
34 | eval_spec = tf.estimator.EvalSpec(eval_input_fn)
35 | 
36 | # 流程：训练train --> 验证eval --> 判断是否要early stop --> 保存模型 --> 训练train
37 | tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
38 | 
39 | 


--------------------------------------------------------------------------------
/_tensorflow/estimator.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def model_fn(features, labels, mode, params):  # 必须要有前面三个参数
 5 |     # feature和labels其实就是`input_fn`方法传输过来的
 6 |     # mode是用来判断你现在是训练或测试阶段
 7 |     # params是在创建`estimator`对象的输入参数
 8 |     lr = params['lr']
 9 |     try:
10 |         init_checkpoint = params['init_checkpoint']
11 |     except KeyError:
12 |         init_checkpoint = None
13 | 
14 |     x = features['inputs']
15 |     y = features['labels']
16 | 
17 |     #####################在这里定义你自己的网络模型###################
18 |     pre = tf.layers.dense(x, 1)
19 |     loss = tf.reduce_mean(tf.pow(pre - y, 2), name='loss')
20 |     ######################在这里定义你自己的网络模型###################
21 | 
22 |     # 这里可以加载你的预训练模型
23 |     assignment_map = dict()
24 |     if init_checkpoint:
25 |         for var in tf.train.list_variables(init_checkpoint):  # 存放checkpoint的变量名称和shape
26 |             assignment_map[var[0]] = var[0]
27 |         tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
28 | 
29 |     # 定义你训练过程要做的事情
30 |     if mode == tf.estimator.ModeKeys.TRAIN:
31 |         optimizer = tf.train.AdamOptimizer(lr)
32 |         train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
33 |         output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
34 | 
35 |     # 定义你测试（验证）过程
36 |     elif mode == tf.estimator.ModeKeys.EVAL:
37 |         metrics = {'eval_loss': tf.metrics.mean_tensor(loss), "accuracy": tf.metrics.accuracy(labels, pre)}
38 |         output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
39 | 
40 |     # 定义你的预测过程
41 |     elif mode == tf.estimator.ModeKeys.PREDICT:
42 |         predictions = {'predictions': pre}
43 |         output_spec = tf.estimator.EstimatorSpec(mode, predictions=predictions)
44 | 
45 |     else:
46 |         raise TypeError
47 | 
48 |     return output_spec
49 | 
50 | 
51 | def input_fn_bulider(inputs_file, batch_size, is_training):
52 |     name_to_features = {'inputs': tf.FixedLenFeature([3], tf.float32),
53 |                         'labels': tf.FixedLenFeature([], tf.float32)}
54 | 
55 |     def input_fn(params):
56 |         d = tf.data.TFRecordDataset(inputs_file)
57 |         if is_training:
58 |             d = d.repeat()
59 |             d = d.shuffle()
60 | 
61 |         # map_and_batch其实就是将map和batch结合起来而已
62 |         d = d.apply(tf.contrib.data.map_and_batch(lambda x: tf.parse_single_example(x, name_to_features),
63 |                                                   batch_size=batch_size))
64 |         return d
65 | 
66 |     return input_fn
67 | 
68 | 
69 | if __name__ == '__main':
70 |     # 定义日志消息的输出级别，为了获取模型的反馈信息，选择INFO
71 |     tf.logging.set_verbosity(tf.logging.INFO)
72 |     # 我在这里是指定模型的保存和loss输出频率
73 |     runConfig = tf.estimator.RunConfig(save_checkpoints_steps=1,
74 |                                        log_step_count_steps=1)
75 | 
76 |     estimator = tf.estimator.Estimator(model_fn, model_dir='your_save_path',
77 |                                        config=runConfig, params={'lr': 0.01})
78 | 
79 |     # log_step_count_steps控制的只是loss的global_step的输出
80 |     # 我们还可以通过tf.train.LoggingTensorHook自定义更多的输出
81 |     # tensor是我们要输出的内容，输入一个字典，key为打印出来的名称，value为你要输出的tensor的name
82 |     logging_hook = tf.train.LoggingTensorHook(every_n_iter=1,
83 |                                               tensors={'loss': 'loss:0'})
84 | 
85 |     # 其实给到estimator.train是一个dataset对象
86 |     input_fn = input_fn_bulider('test.tfrecord', batch_size=1, is_training=True)
87 |     estimator.train(input_fn, max_steps=1000, hooks=[logging_hook])
88 | 


--------------------------------------------------------------------------------
/_tensorflow/hashtable.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import time
  3 | import numpy as np
  4 | 
  5 | 
  6 | def demo():
  7 |     """
  8 |     insert：插入键值对
  9 |     export：导出hashtable
 10 |     lookup：key查询
 11 |     remove：删除key
 12 |     size：hashtable的容量
 13 |     :return:
 14 |     """
 15 |     keys = tf.placeholder(dtype=tf.string, shape=[None])
 16 |     values = tf.placeholder(dtype=tf.int64, shape=[None])
 17 |     # 如果有多个表，则需要name命名，否则保存加载时，会因为都是默认命名而导致被覆盖
 18 |     table1 = tf.contrib.lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=-1,
 19 |                                                 name="HashTable_1")
 20 |     table2 = tf.contrib.lookup.MutableHashTable(tf.string, tf.int64, -1)
 21 |     insert_table1 = table1.insert(keys, values)
 22 |     insert_table2 = table2.insert(keys, values)
 23 |     saver = tf.train.Saver()
 24 |     with tf.Session() as sess:
 25 |         sess.run(insert_table1, feed_dict={keys: ["a"], values: [1]})
 26 |         sess.run(insert_table2, feed_dict={keys: ["b"], values: [2]})
 27 |         print("table1:", sess.run(table1.export()))
 28 |         print("table2:", sess.run(table2.export()))
 29 |         saver.saverve(sess, "checkpoint/test")
 30 | 
 31 | 
 32 | def run():
 33 |     """
 34 |     测试50W容量的hashtable，保存的大小和查询速度
 35 |     :return:
 36 |     """
 37 |     size = 500000
 38 |     keys = tf.placeholder(dtype=tf.string, shape=[None])
 39 |     values = tf.placeholder(dtype=tf.int64, shape=[None])
 40 |     # 如果有多个表，则需要name命名，否则保存加载时，会因为都是默认命名而导致被覆盖
 41 |     table1 = tf.contrib.lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=-1,
 42 |                                                 name="tower/HashTable_1")
 43 |     insert_table1 = table1.insert(keys, values)
 44 |     lookup = table1.lookup(keys)
 45 |     saver = tf.train.Saver()
 46 |     with tf.Session() as sess:
 47 |         sess.run(insert_table1,
 48 |                  feed_dict={keys: ["id_" + str(i) for i in range(size)], values: list(range(size))})
 49 |         # print("table1:", sess.run(table1.export()))
 50 | 
 51 |         # 查询时间：0.007218122482299805
 52 |         # 模型大小：8.9M
 53 |         s1 = time.time()
 54 |         print(sess.run(lookup, feed_dict={keys: ["id_1", "id_100"]}))
 55 |         print(time.time() - s1)
 56 |         saver.save(sess, "checkpoint/test")
 57 | 
 58 | 
 59 | def test():
 60 |     """
 61 |     测试50W容量的hashtable，保存的大小和查询速度
 62 |     :return:
 63 |     """
 64 |     size = 500000
 65 |     keys = tf.placeholder(dtype=tf.string, shape=[None])
 66 |     values = tf.placeholder(dtype=tf.string, shape=[None])
 67 |     # 如果有多个表，则需要name命名，否则保存加载时，会因为都是默认命名而导致被覆盖
 68 |     table1 = tf.contrib.lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.string, default_value="",
 69 |                                                 name="HashTable_1")
 70 |     insert_table1 = table1.insert(keys, values)
 71 |     lookup = table1.lookup(keys)
 72 |     saver = tf.train.Saver()
 73 |     with tf.Session() as sess:
 74 |         sess.run(insert_table1,
 75 |                  feed_dict={keys: ["id_" + str(i) for i in range(size)],
 76 |                             values: [np.array([i, i]).tostring() for i in range(size)]})
 77 |         # print("table1:", sess.run(table1.export()))
 78 | 
 79 |         # 查询时间：0.007218122482299805
 80 |         # 模型大小：8.9M
 81 |         s1 = time.time()
 82 |         print(sess.run(lookup, feed_dict={keys: ["id_1", "id_100"]}))
 83 |         print(time.time() - s1)
 84 |         saver.save(sess, "checkpoint/test")
 85 | 
 86 | 
 87 | def restore():
 88 |     # 如果有多个表，则需要name命名，否则保存加载时，会因为都是默认命名而导致被覆盖
 89 |     # 表名不受variable_scope的影响
 90 |     with tf.variable_scope("tower"):
 91 |         table1 = tf.contrib.lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=-1,
 92 |                                                     name="tower/HashTable_1")
 93 |     saver = tf.train.Saver()
 94 |     with tf.Session() as sess:
 95 |         saver.restore(sess, "checkpoint/test")
 96 |         print(sess.run(table1.lookup(["id_1", "id_100"])))
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     run()
101 |     # test()
102 |     restore()
103 | 


--------------------------------------------------------------------------------
/_tensorflow/serving/serving-model/batch.config:
--------------------------------------------------------------------------------
1 | max_batch_size { value: 3 }
2 | batch_timeout_micros { value: 5000000 }
3 | max_enqueued_batches { value: 1000000 }
4 | num_batch_threads { value: 8 }
5 | 


--------------------------------------------------------------------------------
/_tensorflow/serving/serving-model/model.conf:
--------------------------------------------------------------------------------
 1 | model_config_list: {
 2 |     config:{
 3 |         name:"youtubednn",
 4 |         base_path:"/models/youtubednn",
 5 |         model_platform:"tensorflow",
 6 |         model_version_policy: {
 7 |             latest: {
 8 |                 num_versions:1
 9 |             }
10 |         }
11 |     }, 
12 | }


--------------------------------------------------------------------------------
/_tensorflow/serving/serving_model_save.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 将计算图以pb格式进行保存，用于tf-serving
 3 | """
 4 | import tensorflow.compat.v1 as tf
 5 | # import tensorflow as tf
 6 | 
 7 | # tf2，否则placeholde会报错
 8 | tf.disable_eager_execution()
 9 | 
10 | ############# 在这里定义你的模型  ###########
11 | x1 = tf.placeholder(tf.float32, [None, 2], name='x1')
12 | inputs_id = tf.placeholder(tf.int32, [None, 2], name='x2')
13 | 
14 | out = tf.add(tf.multiply(x1, 0.5), 2)
15 | 
16 | embedding = tf.get_variable("embedding_table", shape=[100, 10])
17 | pre = tf.nn.embedding_lookup(embedding, inputs_id)
18 | ############# 在这里定义你的模型  ###########
19 | 
20 | sess = tf.Session()
21 | sess.run(tf.global_variables_initializer())
22 | 
23 | # 将张量转化为tensor_info
24 | tensor_info_x1 = tf.saved_model.utils.build_tensor_info(x1)
25 | tensor_info_inputs_id = tf.saved_model.utils.build_tensor_info(inputs_id)
26 | tensor_info_out = tf.saved_model.utils.build_tensor_info(out)
27 | tensor_info_pre = tf.saved_model.utils.build_tensor_info(pre)
28 | 
29 | # 创建SavedModelBuilder，指定保存路径
30 | builder = tf.saved_model.builder.SavedModelBuilder("serving-model/3")
31 | 
32 | """
33 | 接口传参
34 | {"instances": [{"x1": [1.0, 2.0, 5.0],"inputs_id": [1, 2, 3]}]}，此时签名定义只能为默认的"serving_default"
35 | {"inputs": {"x1": [1.0, 2.0, 5.0],"inputs_id": [1, 2, 3]}}
36 | 使用自定义的签名
37 | {"instances": [{"x1": [1.0, 2.0, 5.0],"inputs_id": [1, 2, 3]}], "signature_name": "my_signature"}
38 | 
39 | 返回：{"predictions":[{"out":......., "pre":......}]}
40 | """
41 | # 指定接口的输入以及返回
42 | prediction_signature = (
43 |   tf.saved_model.signature_def_utils.build_signature_def(
44 |       inputs={'x1': tensor_info_x1, "inputs_id": tensor_info_inputs_id},
45 |       outputs={'out': tensor_info_out, "pre": tensor_info_pre},
46 |       method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
47 | 
48 | # 定义签名
49 | legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
50 | builder.add_meta_graph_and_variables(
51 |     sess, [tf.saved_model.tag_constants.SERVING],
52 |     signature_def_map={
53 |         # 使用自定义的签名：my_signature
54 |         # 'my_signature':
55 |         #     prediction_signature,
56 | 
57 |         # 使用tensorflow默认的签名：serving_default
58 |         tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
59 |             prediction_signature,
60 |     },
61 |     legacy_init_op=legacy_init_op)
62 | 
63 | # 模型保存
64 | builder.save()
65 | 
66 | print('Done exporting!')


--------------------------------------------------------------------------------
/_tensorflow/serving/warmup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tensorflow as tf
 3 | from tensorflow_serving.apis import model_pb2
 4 | from tensorflow_serving.apis import predict_pb2
 5 | from tensorflow_serving.apis import prediction_log_pb2
 6 | 
 7 | 
 8 | def main():
 9 |     serving_dir = "serving-model"
10 |     version = "3"
11 | 
12 |     with tf.python_io.TFRecordWriter(
13 |             os.path.join(serving_dir, version, "assets.extra/tf_serving_warmup_requests")) as writer:
14 |         request = predict_pb2.PredictRequest(
15 |             model_spec=model_pb2.ModelSpec(name="inception", signature_name='serving_default'),
16 |             inputs={"x1": tf.make_tensor_proto([[1.0, 2.0]], shape=[1, 2]),
17 |                     "inputs_id": tf.make_tensor_proto([[1, 2]], shape=[1, 2])}
18 |         )
19 | 
20 |         log = prediction_log_pb2.PredictionLog(
21 |             predict_log=prediction_log_pb2.PredictLog(request=request))
22 |         writer.write(log.SerializeToString())
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tensorflow_zero_out *.so
2 | recursive-include tensorflow_time_two *.so
3 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/Makefile:
--------------------------------------------------------------------------------
 1 | CXX := g++
 2 | NVCC := nvcc
 3 | PYTHON_BIN_PATH = python
 4 | 
 5 | # 待编译的算子源码文件
 6 | BINARY_CODE_HASH_SRCS = tensorflow_binary_code_hash/cc/kernels/binary_code_hash_kernels.cc $(wildcard tensorflow_binary_code_hash/cc/kernels/*.h) $(wildcard tensorflow_binary_code_hash/cc/ops/*.cc)
 7 | BINARY_CODE_HASH_CPU_ONLY_SRCS = tensorflow_binary_code_hash/cc/kernels/binary_code_hash_only_cpu_kernels.cc $(wildcard tensorflow_binary_code_hash/cc/ops/*.cc)
 8 | 
 9 | # 获取tensorflow的c++源码位置
10 | TF_CFLAGS := $(shell $(PYTHON_BIN_PATH) -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
11 | TF_LFLAGS := $(shell $(PYTHON_BIN_PATH) -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
12 | 
13 | # 对于新版本的tensorflow, 需要使用新标准, 比如tensorflow2.10则需指定-std=c++17
14 | CFLAGS = ${TF_CFLAGS} -fPIC -O2 -std=c++11
15 | LDFLAGS = -shared ${TF_LFLAGS}
16 | 
17 | # 编译目标so文件位置
18 | BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB = tensorflow_binary_code_hash/python/ops/_binary_code_hash_ops.cu.o
19 | BINARY_CODE_HASH_TARGET_LIB = tensorflow_binary_code_hash/python/ops/_binary_code_hash_ops.so
20 | BINARY_CODE_HASH_CPU_ONLY_TARGET_LIB = tensorflow_binary_code_hash/python/ops/_binary_code_hash_cpu_ops.so
21 | 
22 | # 编译命令: binary_code_hash op
23 | binary_code_hash_gpu_only: $(BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB)
24 | 
25 | $(BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB): tensorflow_binary_code_hash/cc/kernels/binary_code_hash_kernels.cu.cc
26 | 	$(NVCC) -std=c++11 -c -o $@ $^  $(TF_CFLAGS) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -DNDEBUG --expt-relaxed-constexpr
27 | 
28 | binary_code_hash_op: $(BINARY_CODE_HASH_TARGET_LIB)
29 | $(BINARY_CODE_HASH_TARGET_LIB): $(BINARY_CODE_HASH_SRCS) $(BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB)
30 | 	$(CXX) $(CFLAGS) -o $@ $^ ${LDFLAGS}  -D GOOGLE_CUDA=1  -I/usr/local/cuda/targets/x86_64-linux/include -L/usr/local/cuda/targets/x86_64-linux/lib -lcudart
31 | 
32 | binary_code_hash_cpu_only: $(BINARY_CODE_HASH_CPU_ONLY_TARGET_LIB)
33 | 
34 | $(BINARY_CODE_HASH_CPU_ONLY_TARGET_LIB): $(BINARY_CODE_HASH_CPU_ONLY_SRCS)
35 | 	$(CXX) $(CFLAGS) -o $@ $^ ${LDFLAGS}
36 | 
37 | # Python调用测试
38 | binary_code_hash_test: tensorflow_binary_code_hash/python/ops/binary_code_hash_ops_test.py tensorflow_binary_code_hash/python/ops/binary_code_hash_ops.py
39 | 	$(PYTHON_BIN_PATH) tensorflow_binary_code_hash/python/ops/binary_code_hash_ops_test.py
40 | 
41 | clean:
42 | 	rm -f $(BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB) $(BINARY_CODE_HASH_TARGET_LIB) $(BINARY_CODE_HASH_CPU_ONLY_TARGET_LIB)
43 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/WORKSPACE:
--------------------------------------------------------------------------------
1 | load("//tf:tf_configure.bzl", "tf_configure")
2 | load("//gpu:cuda_configure.bzl", "cuda_configure")
3 | 
4 | tf_configure(name = "local_config_tf")
5 | 
6 | cuda_configure(name = "local_config_cuda")


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/BUILD:
--------------------------------------------------------------------------------
  1 | licenses(["notice"])  # Apache 2.0
  2 | 
  3 | package(default_visibility = ["//visibility:public"])
  4 | load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured", "if_cuda")
  5 | 
  6 | config_setting(
  7 |     name = "windows",
  8 |     constraint_values = ["@bazel_tools//platforms:windows"],
  9 | )
 10 | 
 11 | cc_library(
 12 |     name = "cuda",
 13 |     data = [
 14 |         "@local_config_cuda//cuda:cudart",
 15 |     ],
 16 |     linkopts = select({
 17 |         "@local_config_cuda//cuda:darwin": [
 18 |             "-Wl,-rpath,../local_config_cuda/cuda/lib",
 19 |             "-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib",
 20 |         ],
 21 |         ":windows": [],
 22 |         "//conditions:default": [
 23 |             "-Wl,-rpath,../local_config_cuda/cuda/lib64",
 24 |             "-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib64",
 25 |         ],
 26 |     }),
 27 |     deps = [
 28 |         "@local_config_cuda//cuda:cudart",
 29 |     ],
 30 | )
 31 | 
 32 | cc_library(
 33 |     name = "binary_code_hash_ops_gpu",
 34 |     srcs = ["cc/kernels/binary_code_hash.h", "cc/kernels/binary_code_hash_kernels.cu.cc"],
 35 |     deps = [
 36 |         "@local_config_tf//:libtensorflow_framework",
 37 |         "@local_config_tf//:tf_header_lib",
 38 |     ] + if_cuda_is_configured([":cuda",  "@local_config_cuda//cuda:cuda_headers"]),
 39 |     alwayslink = 1,
 40 |     linkopts = [],
 41 |     copts = select({
 42 |         ":windows": ["/D__CLANG_SUPPORT_DYN_ANNOTATION__", "/DEIGEN_MPL2_ONLY", "/DEIGEN_MAX_ALIGN_BYTES=64", "/DEIGEN_HAS_TYPE_TRAITS=0", "/DTF_USE_SNAPPY", "/showIncludes", "/MD", "/O2", "/DNDEBUG", "/w", "-DWIN32_LEAN_AND_MEAN", "-DNOGDI", "/d2ReducedOptimizeHugeFunctions", "/arch:AVX", "/std:c++14", "-DTENSORFLOW_MONOLITHIC_BUILD", "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", "/DEIGEN_AVOID_STL_ARRAY", "/Iexternal/gemmlowp", "/wd4018", "/wd4577", "/DNOGDI", "/UTF_COMPILE_LIBRARY"],
 43 |         "//conditions:default": ["-pthread", "-std=c++11", "-D_GLIBCXX_USE_CXX11_ABI=0"],
 44 |     }) + if_cuda_is_configured(["-DTENSORFLOW_USE_NVCC=1", "-DGOOGLE_CUDA=1", "-x cuda", "-nvcc_options=relaxed-constexpr", "-nvcc_options=ftz=true"]),
 45 | )
 46 | 
 47 | cc_binary(
 48 |     name = 'python/ops/_binary_code_hash_ops.so',
 49 |     srcs = [
 50 |         "cc/kernels/binary_code_hash.h",
 51 |         "cc/kernels/binary_code_hash_kernels.cc",
 52 |         "cc/ops/binary_code_hash_ops.cc",
 53 |     ],
 54 |     linkshared = 1,
 55 |     features = select({
 56 |         ":windows": ["windows_export_all_symbols"],
 57 |         "//conditions:default": [],
 58 |     }),    
 59 |     deps = [
 60 |         "@local_config_tf//:libtensorflow_framework",
 61 |         "@local_config_tf//:tf_header_lib",
 62 |     ] + if_cuda_is_configured([":binary_code_hash_ops_gpu"]),
 63 |     copts = select({
 64 |         ":windows": ["/D__CLANG_SUPPORT_DYN_ANNOTATION__", "/DEIGEN_MPL2_ONLY", "/DEIGEN_MAX_ALIGN_BYTES=64", "/DEIGEN_HAS_TYPE_TRAITS=0", "/DTF_USE_SNAPPY", "/showIncludes", "/MD", "/O2", "/DNDEBUG", "/w", "-DWIN32_LEAN_AND_MEAN", "-DNOGDI", "/d2ReducedOptimizeHugeFunctions", "/arch:AVX", "/std:c++14", "-DTENSORFLOW_MONOLITHIC_BUILD", "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", "/DEIGEN_AVOID_STL_ARRAY", "/Iexternal/gemmlowp", "/wd4018", "/wd4577", "/DNOGDI", "/UTF_COMPILE_LIBRARY"],
 65 |         "//conditions:default": ["-pthread", "-std=c++11", "-D_GLIBCXX_USE_CXX11_ABI=0"],
 66 |     }) + if_cuda_is_configured(["-DTENSORFLOW_USE_NVCC=1", "-DGOOGLE_CUDA=1", "-x cuda", "-nvcc_options=relaxed-constexpr", "-nvcc_options=ftz=true"]),
 67 | )
 68 | 
 69 | py_library(
 70 |     name = "binary_code_hash_ops_py",
 71 |     srcs = ([
 72 |         "python/ops/binary_code_hash_ops.py",
 73 |     ]),
 74 |     data = [
 75 |         ":python/ops/_binary_code_hash_ops.so"
 76 |     ],
 77 |     srcs_version = "PY2AND3",
 78 | )
 79 | 
 80 | py_test(
 81 |     name = "binary_code_hash_ops_py_test",
 82 |     srcs = [
 83 |         "python/ops/binary_code_hash_ops_test.py"
 84 |     ],
 85 |     main = "python/ops/binary_code_hash_ops_test.py",
 86 |     deps = [
 87 |         ":binary_code_hash_ops_py",
 88 |     ],
 89 |     srcs_version = "PY2AND3",
 90 | )
 91 | 
 92 | py_library(
 93 |     name = "binary_code_hash_py",
 94 |     srcs = ([
 95 |         "__init__.py",
 96 |         "python/__init__.py",
 97 |         "python/ops/__init__.py",
 98 |     ]),
 99 |     deps = [
100 |         ":binary_code_hash_ops_py"
101 |     ],
102 |     srcs_version = "PY2AND3",
103 | )
104 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TensorFlow custom op example."""
16 | 
17 | from __future__ import absolute_import
18 | 
19 | from tensorflow_zero_out.python.ops.zero_out_ops import zero_out
20 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/cc/kernels/binary_code_hash.h:
--------------------------------------------------------------------------------
 1 | // kernel_example.h
 2 | #ifndef KERNEL_BINARY_CODE_HASH_H_
 3 | #define KERNEL_BINARY_CODE_HASH_H_
 4 | 
 5 | #include <string>
 6 | 
 7 | namespace tensorflow {
 8 | 
 9 | namespace functor {
10 | 
11 | template <typename Device, typename T>
12 | struct BinaryCodeHashFunctor {
13 |   void operator()(const Device& d, int size, const T* in, T* out, int length, int t, bool succession);
14 | };
15 | 
16 | }  // namespace functor
17 | 
18 | }  // namespace tensorflow
19 | 
20 | #endif //KERNEL_BINARY_CODE_HASH_H_
21 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/cc/kernels/binary_code_hash_kernels.cu.cc:
--------------------------------------------------------------------------------
  1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | You may obtain a copy of the License at
  6 | 
  7 |     http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | ==============================================================================*/
 15 | 
 16 | #if GOOGLE_CUDA
 17 | 
 18 | #define EIGEN_USE_GPU
 19 | 
 20 | #include <array>
 21 | #include <stdio.h>
 22 | #include <iostream>
 23 | #include "binary_code_hash.h"
 24 | #include "tensorflow/core/util/gpu_kernel_helper.h"
 25 | 
 26 | namespace tensorflow {
 27 | namespace functor {
 28 | 
 29 | typedef Eigen::GpuDevice GPUDevice;
 30 | 
 31 | // Define the CUDA kernel.
 32 | // Cann't use c++ std.
 33 | template <typename T>
 34 | __global__ void BinaryCodeHashCudaKernel(const int size, const T* in, T* out, int length, int t, bool succession) {
 35 |   int block_num;
 36 |   int block_length;
 37 |   if (succession){
 38 |     block_num = (length - 1) / t + 1;
 39 |     block_length = t;
 40 |   } else {
 41 |     block_num = t + 1;
 42 |     block_length = (length - 1) / block_num + 1;
 43 |   }
 44 | 
 45 |   int* binary_code = new int[length];
 46 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size;
 47 |        i += blockDim.x * gridDim.x) {
 48 |     // out[i] = 2 * ldg(in + i);
 49 |     // Convert into binary
 50 |       T num = ldg(in + i);
 51 |       for(int k=0; k<length; k++){
 52 |           if (num > 0){
 53 |             binary_code[k] = num % 2;    
 54 |             num = num / 2; 
 55 |           } else {
 56 |             binary_code[k] = 0;
 57 |           }  
 58 |       }
 59 |       
 60 |       // Convert into 10base every block
 61 |       if (succession){
 62 |         for (int n = 0; n < block_num; n++){
 63 |           T num = 0;
 64 |           T start_index = n * (1 << block_length);
 65 |           for (int m = 0; m < t; m++){
 66 |             if (n*t+m>=length){
 67 |               break;
 68 |             }
 69 |             if (binary_code[n*t+m] == 1){
 70 |               num += 1 << m;
 71 |             }
 72 |           }
 73 |           out[i*block_num+n] = num + start_index;
 74 |         }
 75 |       }else { // skip
 76 |         for (int n = 0; n < block_num; n++){
 77 |           T num = 0;
 78 |           T start_index = n * (1 << block_length);
 79 |           for (int m = n; m < length; m+=t+1){
 80 |             if (binary_code[m] == 1){
 81 |               num += 1 << m;
 82 |             }
 83 |           }
 84 |           out[i*block_num+n] = num + start_index;
 85 |         }
 86 |       }
 87 |   }
 88 |   delete[] binary_code;
 89 | }
 90 | 
 91 | // Define the GPU implementation that launches the CUDA kernel.
 92 | template <typename T>
 93 | struct BinaryCodeHashFunctor<GPUDevice, T> {
 94 |   void operator()(const GPUDevice& d, int size, const T* in, T* out, int length, int t, bool succession) {
 95 |     // std::cout << "@@@@@@ Runnin CUDA @@@@@@" << std::endl;
 96 |     // Launch the cuda kernel.
 97 |     //
 98 |     // See core/util/cuda_kernel_helper.h for example of computing
 99 |     // block count and thread_per_block count.
100 |     int block_count = 1024;
101 |     int thread_per_block = 20;
102 |     BinaryCodeHashCudaKernel<T>
103 |         <<<block_count, thread_per_block, 0, d.stream()>>>(size, in, out, length, t, succession);
104 |   }
105 | };
106 | 
107 | // Explicitly instantiate functors for the types of OpKernels registered.
108 | template struct BinaryCodeHashFunctor<GPUDevice, int32>;
109 | template struct BinaryCodeHashFunctor<GPUDevice, int64>;
110 | }  // end namespace functor
111 | }  // end namespace tensorflow
112 | 
113 | #endif  // GOOGLE_CUDA
114 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/cc/kernels/binary_code_hash_only_cpu_kernels.cc:
--------------------------------------------------------------------------------
  1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | You may obtain a copy of the License at
  6 | 
  7 |     http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | ==============================================================================*/
 15 | 
 16 | #include <string>
 17 | #include <iostream>
 18 | #include "tensorflow/core/framework/op_kernel.h"
 19 | 
 20 | using namespace tensorflow;
 21 | 
 22 | template <typename T>
 23 | class BinaryCodeHashOp : public OpKernel {
 24 |  public:
 25 |   explicit BinaryCodeHashOp(OpKernelConstruction* context) : OpKernel(context) {
 26 |     // Check the inputs
 27 |     OP_REQUIRES_OK(context, context->GetAttr("length", &length_));
 28 |     OP_REQUIRES_OK(context, context->GetAttr("t", &t_));
 29 |     OP_REQUIRES_OK(context, context->GetAttr("strategy", &strategy_));
 30 | 
 31 |     OP_REQUIRES(context, length_ > 0,
 32 |                 errors::InvalidArgument("Need length > 0, got ", length_));
 33 |     OP_REQUIRES(context, t_ > 0,
 34 |                 errors::InvalidArgument("Need t > 0, got ", t_));
 35 |     OP_REQUIRES(context, length_ >= t_,
 36 |                 errors::InvalidArgument("Need length >= t, got length: ", length_, " and t: ", t_));
 37 |     
 38 |     // std::cout << "length: " << length_ << ", t: " << t_ << ", strategy: " << strategy_ << std::endl;
 39 |   }
 40 | 
 41 |   void Compute(OpKernelContext* context) override {
 42 |     // Grab the input tensor
 43 |     const Tensor& input_tensor = context->input(0);
 44 |     auto input = input_tensor.flat<T>();
 45 | 
 46 |     // OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
 47 |     //             errors::InvalidArgument("BinaryCodeHash expects a 1-D vector."));
 48 | 
 49 |     // Create an output tensor
 50 |     int block_num;
 51 |     int block_length;
 52 |     if (strategy_ == "succession"){
 53 |       block_num = (length_ - 1) / t_ + 1;
 54 |       block_length = t_;
 55 |     } else {
 56 |       block_num = t_ + 1;
 57 |       block_length = (length_ - 1) / block_num + 1;
 58 |     }
 59 |     Tensor* output_tensor = NULL;
 60 |     // OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
 61 |     //                                                  &output_tensor));
 62 |     // tensorflow::TensorShape output_shape({input_tensor.shape().dim_size(0), block_num});
 63 |     tensorflow::TensorShape output_shape = input_tensor.shape();
 64 |     output_shape.AddDim(block_num);  // Add New dimension
 65 |     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor));
 66 |     auto output_flat = output_tensor->flat<T>();
 67 | 
 68 |     // Compute Binary Code Hash.
 69 |     const int N = input.size();
 70 |     // std::cout << "input size: " << N << ", output size: " << output_flat.size() << std::endl;
 71 |     for (int i = 0; i < N; i++) {
 72 |       // Convert into binary
 73 |       int binary_code[length_];
 74 |       T num = input(i);
 75 |       for(int k=0; k<length_; k++){
 76 |           if (num > 0){
 77 |             binary_code[k] = num % 2;    
 78 |             num = num / 2; 
 79 |           } else {
 80 |             binary_code[k] = 0;
 81 |           }  
 82 |       }
 83 |       
 84 |       // Convert into 10base every block
 85 |       if (strategy_ == "succession"){
 86 |         for (int n = 0; n < block_num; n++){
 87 |           T num = 0;
 88 |           T start_index = n * (1 << block_length);
 89 |           for (int m = 0; m < t_; m++){
 90 |             if (n*t_+m>=length_){
 91 |               break;
 92 |             }
 93 |             if (binary_code[n*t_+m] == 1){
 94 |               num += 1 << m;
 95 |             }
 96 |           }
 97 |           output_flat(i*block_num+n) = num + start_index;
 98 |         }
 99 |       }else { // skip
100 |         for (int n = 0; n < block_num; n++){
101 |           T num = 0;
102 |           T start_index = n * (1 << block_length);
103 |           for (int m = n; m < length_; m+=t_+1){
104 |             if (binary_code[m] == 1){
105 |               num += 1 << m;
106 |             }
107 |           }
108 |           output_flat(i*block_num+n) = num + start_index;
109 |         }
110 |       }
111 |     }
112 |   }
113 | 
114 |   private:
115 |     int length_;
116 |     int t_;
117 |     std::string strategy_;
118 | };
119 | 
120 | // REGISTER_KERNEL_BUILDER(Name("BinaryCodeHash").Device(DEVICE_CPU).TypeConstraint<T>("T"), BinaryCodeHashOp<T>);
121 | #define REGISTER_CPU(T)                                          \
122 |   REGISTER_KERNEL_BUILDER(                                       \
123 |       Name("BinaryCodeHash").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
124 |       BinaryCodeHashOp<T>);
125 | REGISTER_CPU(int64);
126 | REGISTER_CPU(int32);
127 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/cc/ops/binary_code_hash_ops.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 | 
16 | #include <string>
17 | #include <vector>
18 | #include "tensorflow/core/framework/op.h"
19 | #include "tensorflow/core/framework/shape_inference.h"
20 | 
21 | using namespace tensorflow;
22 | 
23 | REGISTER_OP("BinaryCodeHash")
24 |     .Attr("T: {int64, int32}")
25 |     .Input("hash_id: T")
26 |     .Attr("length: int")
27 |     .Attr("t: int")
28 |     .Attr("strategy: {'succession', 'skip'}")
29 |     .Output("bh_id: T")
30 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
31 |       int length;
32 |       int t;
33 |       std::string strategy;
34 |       c->GetAttr("length", &length);
35 |       c->GetAttr("t", &t);
36 |       c->GetAttr("strategy", &strategy);
37 |       int block_num;
38 |       if (strategy == "succession"){
39 |         block_num = (length - 1) / t + 1;
40 |       } else {
41 |         block_num = t + 1;
42 |       }
43 | 
44 |       // 获取输入张量的形状
45 |       shape_inference::ShapeHandle input_shape;
46 |       TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &input_shape));
47 | 
48 |       // 获取输入张量的维度数
49 |       int input_rank = c->Rank(input_shape);
50 | 
51 |       // 创建新的形状列表
52 |       std::vector<shape_inference::DimensionHandle> output_shape;
53 |       for (int i = 0; i < input_rank; ++i) {
54 |           output_shape.push_back(c->Dim(input_shape, i));
55 |       }
56 | 
57 |       // 添加一个额外的维度
58 |       output_shape.push_back(c->MakeDim(block_num));
59 | 
60 |       // 将output_shape转换为输出张量的形状
61 |       c->set_output(0, c->MakeShape(output_shape));
62 | 
63 |       return Status::OK();
64 |     });
65 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/python/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/python/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/python/ops/binary_code_hash_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Sonnet Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Use binary_code_hash ops in python."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | from tensorflow.python.framework import load_library
22 | from tensorflow.python.platform import resource_loader
23 | 
24 | try:
25 |     binary_code_hash_ops = load_library.load_op_library(
26 |         resource_loader.get_path_to_datafile('_binary_code_hash_ops.so'))
27 | except:
28 |     binary_code_hash_ops = load_library.load_op_library(
29 |         resource_loader.get_path_to_datafile('_binary_code_hash_cpu_ops.so'))
30 | binary_code_hash = binary_code_hash_ops.binary_code_hash
31 | 


--------------------------------------------------------------------------------
/_tensorflow/tensorflow-custom-op/tensorflow_binary_code_hash/python/ops/binary_code_hash_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Sonnet Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Tests for binary_code_hash ops."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import tensorflow as tf
21 | 
22 | from tensorflow.python.platform import test
23 | try:
24 |   from tensorflow_binary_code_hash.python.ops.binary_code_hash_ops import binary_code_hash
25 | except ImportError:
26 |   from binary_code_hash_ops import binary_code_hash
27 | 
28 | 
29 | class ZeroOutTest(test.TestCase):
30 | 
31 |   def testZeroOut(self):
32 |     with self.test_session() as sess:
33 | 
34 |       if int(tf.__version__.split('.')[0]) == 1:  # tensorflow 1.x
35 |         print(sess.run(binary_code_hash([9999, 16777216, 16777220, 16777300], length=24, t=7, strategy="succession")))
36 |       else:  # tensorflow 2.x
37 |         print(binary_code_hash([9999, 16777216, 16777220, 16777300], length=24, t=7, strategy="succession").numpy())
38 | 
39 | 
40 | if __name__ == '__main__':
41 |   test.main()
42 | 


--------------------------------------------------------------------------------
/_tensorflow/tfrecord.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import collections
 3 | import numpy as np
 4 | 
 5 | inputs_1 = np.array([
 6 |     [[1, 2], [3, 4]],
 7 |     [[5, 6], [7, 8]]
 8 | ], dtype=np.int32)  # 这里的类型int32要与解析时使用的类型保持一致
 9 | 
10 | inputs_2 = [
11 |     [1.1, 2.2, 3.3],
12 |     [4.4, 5.5, 6.6]
13 | ]
14 | lables = [0, 1]
15 | 
16 | 
17 | ################################### 数据写入TFRecord ###################################
18 | 
19 | def create_int_feature(values):
20 |     f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))  # 需要注意这里接受的格式是list，并且只能是一维的
21 |     return f
22 | 
23 | 
24 | def create_float_feature(values):
25 |     f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
26 |     return f
27 | 
28 | 
29 | def create_bytes_feature(values):
30 |     f = tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
31 |     return f
32 | 
33 | 
34 | writer = tf.python_io.TFRecordWriter('test.tfrecord')  # test.tfrecord是写入的文件路径
35 | 
36 | for i1, i2, l in zip(inputs_1, inputs_2, lables):
37 |     features = collections.OrderedDict()  # 这里是新建一个有序字典
38 |     # 对于多维数组，只能先将其转化为byte，才能传递给Feature
39 |     features['inputs_1'] = create_bytes_feature(i1.tostring())
40 |     features['inputs_2'] = create_float_feature(i2)
41 |     features['labels'] = create_int_feature([l])
42 |     features['test'] = create_bytes_feature("--test--".encode('utf-8'))
43 | 
44 |     example = tf.train.Example(features=tf.train.Features(feature=features))
45 | 
46 |     writer.write(example.SerializeToString())
47 | writer.close()
48 | 
49 | 
50 | ################################### 解析TFrecord数据 ###################################
51 | 
52 | name_to_features = {
53 |     "inputs_1": tf.FixedLenFeature([], tf.string),
54 |     "inputs_2": tf.FixedLenFeature([3], tf.float32),  # 这里的格式需要与写入的保持一致，否则可能出现解析错误
55 |     "labels": tf.FixedLenFeature([], tf.int64),
56 |     "test": tf.FixedLenFeature([], tf.string)
57 | }
58 | 
59 | 
60 | # 这里还可以同时读取多个tfrecord文件
61 | files = tf.gfile.Glob('*.tfrecord')
62 | # [file_name_1, file_name_2, .....]
63 | d = tf.data.TFRecordDataset(files)
64 | 
65 | # d = tf.data.TFRecordDataset('test.tfrecord')
66 | d = d.repeat()  # 这里repeat不传递参数，则会无限重复
67 | d = d.shuffle(buffer_size=2)
68 | # map_and_batch其实就是map和batch结合在一起而已
69 | d = d.apply(tf.contrib.data.map_and_batch(
70 |     lambda record: tf.parse_single_example(record, name_to_features),
71 |     batch_size=1))
72 | 
73 | iters = d.make_one_shot_iterator()
74 | batch = iters.get_next()
75 | 
76 | # BytesList解析时会丢失shape信息，需要自己还原它的shape，所以一般也会将shape数据一同写入
77 | # 这里需要将byte解析成原本的数据结构，这里的tf.int32需要与写入时的格式保持一致
78 | inputs_1_batch = tf.decode_raw(batch['inputs_1'], tf.int32)  # tf.int32类型要与源数据的类型保持一致
79 | inputs_1_batch = tf.reshape(inputs_1_batch, [-1, 2, 2])
80 | # 因为每次是batch个inputs_1，所以shape是[-1, 2, 2]，原来的shape是[2, 2]
81 | inputs_2_batch = batch['inputs_2']
82 | labels_batch = batch['labels']
83 | 
84 | # 需decode('utf-8')
85 | test_str = batch['test']
86 | 
87 | sess = tf.Session()
88 | # 这样我们就可以每次获取一个batch的数据了
89 | sess.run([inputs_1_batch, inputs_2_batch, labels_batch])
90 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tensorflow_zero_out *.so
2 | recursive-include tensorflow_time_two *.so
3 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/Makefile:
--------------------------------------------------------------------------------
 1 | CXX := g++
 2 | NVCC := nvcc
 3 | PYTHON_BIN_PATH = python
 4 | 
 5 | # 待编译的算子源码文件
 6 | BINARY_CODE_HASH_SRCS = tensorflow_binary_code_hash/cc/kernels/binary_code_hash_kernels.cc $(wildcard tensorflow_binary_code_hash/cc/kernels/*.h) $(wildcard tensorflow_binary_code_hash/cc/ops/*.cc)
 7 | BINARY_CODE_HASH_CPU_ONLY_SRCS = tensorflow_binary_code_hash/cc/kernels/binary_code_hash_only_cpu_kernels.cc $(wildcard tensorflow_binary_code_hash/cc/ops/*.cc)
 8 | 
 9 | # 获取tensorflow的c++源码位置
10 | TF_CFLAGS := $(shell $(PYTHON_BIN_PATH) -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
11 | TF_LFLAGS := $(shell $(PYTHON_BIN_PATH) -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
12 | 
13 | # 对于新版本的tensorflow, 需要使用新标准, 比如tensorflow2.10则需指定-std=c++17
14 | CFLAGS = ${TF_CFLAGS} -fPIC -O2 -std=c++11
15 | LDFLAGS = -shared ${TF_LFLAGS}
16 | 
17 | # 编译目标so文件位置
18 | BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB = tensorflow_binary_code_hash/python/ops/_binary_code_hash_ops.cu.o
19 | BINARY_CODE_HASH_TARGET_LIB = tensorflow_binary_code_hash/python/ops/_binary_code_hash_ops.so
20 | BINARY_CODE_HASH_CPU_ONLY_TARGET_LIB = tensorflow_binary_code_hash/python/ops/_binary_code_hash_cpu_ops.so
21 | 
22 | # 编译命令: binary_code_hash op
23 | binary_code_hash_gpu_only: $(BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB)
24 | 
25 | $(BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB): tensorflow_binary_code_hash/cc/kernels/binary_code_hash_kernels.cu.cc
26 | 	$(NVCC) -std=c++11 -c -o $@ $^  $(TF_CFLAGS) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -DNDEBUG --expt-relaxed-constexpr
27 | 
28 | binary_code_hash_op: $(BINARY_CODE_HASH_TARGET_LIB)
29 | $(BINARY_CODE_HASH_TARGET_LIB): $(BINARY_CODE_HASH_SRCS) $(BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB)
30 | 	$(CXX) $(CFLAGS) -o $@ $^ ${LDFLAGS}  -D GOOGLE_CUDA=1  -I/usr/local/cuda/targets/x86_64-linux/include -L/usr/local/cuda/targets/x86_64-linux/lib -lcudart
31 | 
32 | binary_code_hash_cpu_only: $(BINARY_CODE_HASH_CPU_ONLY_TARGET_LIB)
33 | 
34 | $(BINARY_CODE_HASH_CPU_ONLY_TARGET_LIB): $(BINARY_CODE_HASH_CPU_ONLY_SRCS)
35 | 	$(CXX) $(CFLAGS) -o $@ $^ ${LDFLAGS}
36 | 
37 | clean:
38 | 	rm -f $(BINARY_CODE_HASH_GPU_ONLY_TARGET_LIB) $(BINARY_CODE_HASH_TARGET_LIB) $(BINARY_CODE_HASH_CPU_ONLY_TARGET_LIB)
39 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/README.md:
--------------------------------------------------------------------------------
  1 | # Binary Code Based Hash Embedding
  2 | 
  3 | Binary Code Based Hash Embedding原理详见: [专栏](https://zhuanlan.zhihu.com/p/670802301)，其实现包括两部分：
  4 | 
  5 | 1. 二进制码Hash编码的算子实现（基于tensorflow官方custom-op仓库制作:  [git](https://github.com/tensorflow/custom-op) | [tutorial](https://www.tensorflow.org/guide/create_op)）
  6 | 
  7 | 2. 基于二进制码的Hash Embedding的具体Python实现: [binary_code_hash_embedding.py](https://github.com/QunBB/DeepLearning/blob/main/embedding/binary_code_hash_embedding/binary_code_hash_embedding.py)
  8 | 
  9 | ## 自定义算子
 10 | 
 11 | ### tensorflow环境
 12 | 
 13 | **Ubuntu下，无需源码安装，pip安装的情况下已通过测试**
 14 | 
 15 | 1. cuda与tensorflow之间版本已兼容，直接pip安装
 16 | 
 17 | 2. cuda与tensorflow之间版本不兼容 
 18 | 
 19 | 	a. 新建Python环境: 
 20 | 
 21 | 	`conda create -n <your_env_name> python=<x.x.x> cudatoolkit=<x.x> cudnn -c conda-forge`
 22 | 
 23 | 	b. 现有Python环境: 
 24 | 
 25 | 	`conda install cudatoolkit=<x.x> cudnn -c conda-forge -n <your_env_name>`
 26 | 
 27 | 	执行以上步骤后，再进行pip安装
 28 | 
 29 | 3. 当然，你仍然可以选择源码编译安装: https://www.tensorflow.org/install/source
 30 | 
 31 | ### 编译
 32 | 
 33 | ```makefile
 34 | make clean
 35 | make binary_code_hash_op
 36 | ```
 37 | 
 38 | 执行完，会生成so文件: `tensorflow_binary_code_hash/python/ops/_binary_code_hash_ops.so`
 39 | `tensorflow_binary_code_hash/python/ops/_binary_code_hash_ops.cu.o`
 40 | 
 41 | 如果你的机器没有GPU，则可以跳过CUDA编译，仅编译CPU版本
 42 | 
 43 | ```makefile
 44 | make clean
 45 | make binary_code_hash_cpu_only
 46 | ```
 47 | 
 48 | 执行完，会生成so文件: `tensorflow_binary_code_hash/python/ops/_binary_code_hash_cpu_ops.so`
 49 | 
 50 | ## Python实现
 51 | 
 52 | 详见 [binary_code_hash_embedding.py](https://github.com/QunBB/DeepLearning/blob/main/embedding/binary_code_hash_embedding/binary_code_hash_embedding.py)
 53 | 
 54 | 其中，算子引入方式如下：
 55 | 
 56 | ```python
 57 | import tensorflow as tf
 58 | 
 59 | # for GPU and CPU
 60 | binary_code_hash_ops = tf.load_op_library('./tensorflow_binary_code_hash/python/ops/_binary_code_hash_ops.so')
 61 | 
 62 | # for only CPU
 63 | # binary_code_hash_ops = tf.load_op_library('./tensorflow_binary_code_hash/python/ops/_binary_code_hash_cpu_ops.so')
 64 | 
 65 | binary_code_hash = binary_code_hash_ops.binary_code_hash
 66 | ```
 67 | 
 68 | ## Issues
 69 | 
 70 | 1. In file included from tensorflow_time_two/cc/kernels/time_two_kernels.cu.cc:21:0: /usr/local/lib/python3.6/dist-packages/tensorflow/include/tensorflow/core/util/gpu_kernel_helper.h:22:10: fatal error: third_party/gpus/cuda/include/cuda_fp16.h: No such file or directory
 71 | 
 72 | 如果是conda环境，tensorflow c++源码的头文件位置则在 `<your_anaconda_path>/envs/<your_env_name>/lib/pythonx.x/site-packages/tensorflow/include`
 73 | 对于tensorflow 1.x，则不是存放在tensorflow，而是在tensorflow_core
 74 | 
 75 | 解决方案一：
 76 | 
 77 | **Copy the CUDA header files to target directory. 拷贝CUDA头文件**
 78 | 
 79 | ```shell
 80 | mkdir -p /usr/local/lib/python3.6/dist-packages/tensorflow/include/third_party/gpus/cuda/include && cp -r /usr/local/cuda/targets/x86_64-linux/include/* /usr/local/lib/python3.6/dist-packages/tensorflow/include/third_party/gpus/cuda/include
 81 | ```
 82 | 
 83 | 解决方案二：
 84 | 
 85 | **修改CUDA头文件.**
 86 | 
 87 | "tensorflow/include/tensorflow/core/util/gpu_kernel_helper.h"
 88 | 
 89 | ```c++
 90 | #include "third_party/gpus/cuda/include/cuda_fp16.h"
 91 | ```
 92 | 
 93 | 替换成
 94 | 
 95 | ```c++
 96 | #include "cuda_fp16.h"
 97 | ```
 98 | 
 99 | "tensorflow/include/tensorflow/core/util/gpu_device_functions.h"
100 | 
101 | ```c++
102 | #include "third_party/gpus/cuda/include/cuComplex.h"
103 | #include "third_party/gpus/cuda/include/cuda.h"
104 | ```
105 | 
106 | 替换成
107 | 
108 | ```c++
109 | #include "cuComplex.h"
110 | #include "cuda.h"
111 | ```
112 | 
113 | 2. tensorflow 2.x支持
114 | 
115 | 对于新版本的tensorflow, **[Makefile](https://github.com/QunBB/DeepLearning/blob/main/embedding/binary_code_hash_embedding/Makefile#L14)中需要指定c++新标准**。 比如tensorflow2.10则需指定-std=c++17
116 | 
117 | 3. tensorflow.python.framework.errors_impl.NotFoundError: dlopen(./tensorflow_binary_code_hash/python/ops/\_binary_code_hash_cpu_ops.so, 0x0006): Library not loaded: @rpath/libtensorflow_framework.2.dylib
118 | 
119 | 运行Python脚本导入算子so文件时的错误。这种错误一般是**Python运行环境与编译时的Python环境不一致导致的**。


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/WORKSPACE:
--------------------------------------------------------------------------------
1 | load("//tf:tf_configure.bzl", "tf_configure")
2 | load("//gpu:cuda_configure.bzl", "cuda_configure")
3 | 
4 | tf_configure(name = "local_config_tf")
5 | 
6 | cuda_configure(name = "local_config_cuda")


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/BUILD:
--------------------------------------------------------------------------------
  1 | licenses(["notice"])  # Apache 2.0
  2 | 
  3 | package(default_visibility = ["//visibility:public"])
  4 | load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured", "if_cuda")
  5 | 
  6 | config_setting(
  7 |     name = "windows",
  8 |     constraint_values = ["@bazel_tools//platforms:windows"],
  9 | )
 10 | 
 11 | cc_library(
 12 |     name = "cuda",
 13 |     data = [
 14 |         "@local_config_cuda//cuda:cudart",
 15 |     ],
 16 |     linkopts = select({
 17 |         "@local_config_cuda//cuda:darwin": [
 18 |             "-Wl,-rpath,../local_config_cuda/cuda/lib",
 19 |             "-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib",
 20 |         ],
 21 |         ":windows": [],
 22 |         "//conditions:default": [
 23 |             "-Wl,-rpath,../local_config_cuda/cuda/lib64",
 24 |             "-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib64",
 25 |         ],
 26 |     }),
 27 |     deps = [
 28 |         "@local_config_cuda//cuda:cudart",
 29 |     ],
 30 | )
 31 | 
 32 | cc_library(
 33 |     name = "binary_code_hash_ops_gpu",
 34 |     srcs = ["cc/kernels/binary_code_hash.h", "cc/kernels/binary_code_hash_kernels.cu.cc"],
 35 |     deps = [
 36 |         "@local_config_tf//:libtensorflow_framework",
 37 |         "@local_config_tf//:tf_header_lib",
 38 |     ] + if_cuda_is_configured([":cuda",  "@local_config_cuda//cuda:cuda_headers"]),
 39 |     alwayslink = 1,
 40 |     linkopts = [],
 41 |     copts = select({
 42 |         ":windows": ["/D__CLANG_SUPPORT_DYN_ANNOTATION__", "/DEIGEN_MPL2_ONLY", "/DEIGEN_MAX_ALIGN_BYTES=64", "/DEIGEN_HAS_TYPE_TRAITS=0", "/DTF_USE_SNAPPY", "/showIncludes", "/MD", "/O2", "/DNDEBUG", "/w", "-DWIN32_LEAN_AND_MEAN", "-DNOGDI", "/d2ReducedOptimizeHugeFunctions", "/arch:AVX", "/std:c++14", "-DTENSORFLOW_MONOLITHIC_BUILD", "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", "/DEIGEN_AVOID_STL_ARRAY", "/Iexternal/gemmlowp", "/wd4018", "/wd4577", "/DNOGDI", "/UTF_COMPILE_LIBRARY"],
 43 |         "//conditions:default": ["-pthread", "-std=c++11", "-D_GLIBCXX_USE_CXX11_ABI=0"],
 44 |     }) + if_cuda_is_configured(["-DTENSORFLOW_USE_NVCC=1", "-DGOOGLE_CUDA=1", "-x cuda", "-nvcc_options=relaxed-constexpr", "-nvcc_options=ftz=true"]),
 45 | )
 46 | 
 47 | cc_binary(
 48 |     name = 'python/ops/_binary_code_hash_ops.so',
 49 |     srcs = [
 50 |         "cc/kernels/binary_code_hash.h",
 51 |         "cc/kernels/binary_code_hash_kernels.cc",
 52 |         "cc/ops/binary_code_hash_ops.cc",
 53 |     ],
 54 |     linkshared = 1,
 55 |     features = select({
 56 |         ":windows": ["windows_export_all_symbols"],
 57 |         "//conditions:default": [],
 58 |     }),    
 59 |     deps = [
 60 |         "@local_config_tf//:libtensorflow_framework",
 61 |         "@local_config_tf//:tf_header_lib",
 62 |     ] + if_cuda_is_configured([":binary_code_hash_ops_gpu"]),
 63 |     copts = select({
 64 |         ":windows": ["/D__CLANG_SUPPORT_DYN_ANNOTATION__", "/DEIGEN_MPL2_ONLY", "/DEIGEN_MAX_ALIGN_BYTES=64", "/DEIGEN_HAS_TYPE_TRAITS=0", "/DTF_USE_SNAPPY", "/showIncludes", "/MD", "/O2", "/DNDEBUG", "/w", "-DWIN32_LEAN_AND_MEAN", "-DNOGDI", "/d2ReducedOptimizeHugeFunctions", "/arch:AVX", "/std:c++14", "-DTENSORFLOW_MONOLITHIC_BUILD", "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", "/DEIGEN_AVOID_STL_ARRAY", "/Iexternal/gemmlowp", "/wd4018", "/wd4577", "/DNOGDI", "/UTF_COMPILE_LIBRARY"],
 65 |         "//conditions:default": ["-pthread", "-std=c++11", "-D_GLIBCXX_USE_CXX11_ABI=0"],
 66 |     }) + if_cuda_is_configured(["-DTENSORFLOW_USE_NVCC=1", "-DGOOGLE_CUDA=1", "-x cuda", "-nvcc_options=relaxed-constexpr", "-nvcc_options=ftz=true"]),
 67 | )
 68 | 
 69 | py_library(
 70 |     name = "binary_code_hash_ops_py",
 71 |     srcs = ([
 72 |         "python/ops/binary_code_hash_ops.py",
 73 |     ]),
 74 |     data = [
 75 |         ":python/ops/_binary_code_hash_ops.so"
 76 |     ],
 77 |     srcs_version = "PY2AND3",
 78 | )
 79 | 
 80 | py_test(
 81 |     name = "binary_code_hash_ops_py_test",
 82 |     srcs = [
 83 |         "python/ops/binary_code_hash_ops_test.py"
 84 |     ],
 85 |     main = "python/ops/binary_code_hash_ops_test.py",
 86 |     deps = [
 87 |         ":binary_code_hash_ops_py",
 88 |     ],
 89 |     srcs_version = "PY2AND3",
 90 | )
 91 | 
 92 | py_library(
 93 |     name = "binary_code_hash_py",
 94 |     srcs = ([
 95 |         "__init__.py",
 96 |         "python/__init__.py",
 97 |         "python/ops/__init__.py",
 98 |     ]),
 99 |     deps = [
100 |         ":binary_code_hash_ops_py"
101 |     ],
102 |     srcs_version = "PY2AND3",
103 | )
104 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TensorFlow custom op example."""
16 | 
17 | from __future__ import absolute_import
18 | 
19 | from tensorflow_zero_out.python.ops.zero_out_ops import zero_out
20 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/cc/kernels/binary_code_hash.h:
--------------------------------------------------------------------------------
 1 | // kernel_example.h
 2 | #ifndef KERNEL_BINARY_CODE_HASH_H_
 3 | #define KERNEL_BINARY_CODE_HASH_H_
 4 | 
 5 | #include <string>
 6 | 
 7 | namespace tensorflow {
 8 | 
 9 | namespace functor {
10 | 
11 | template <typename Device, typename T>
12 | struct BinaryCodeHashFunctor {
13 |   void operator()(const Device& d, int size, const T* in, T* out, int length, int t, bool succession);
14 | };
15 | 
16 | }  // namespace functor
17 | 
18 | }  // namespace tensorflow
19 | 
20 | #endif //KERNEL_BINARY_CODE_HASH_H_
21 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/cc/kernels/binary_code_hash_kernels.cu.cc:
--------------------------------------------------------------------------------
  1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | You may obtain a copy of the License at
  6 | 
  7 |     http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | ==============================================================================*/
 15 | 
 16 | #if GOOGLE_CUDA
 17 | 
 18 | #define EIGEN_USE_GPU
 19 | 
 20 | #include <array>
 21 | #include <stdio.h>
 22 | #include <iostream>
 23 | #include "binary_code_hash.h"
 24 | #include "tensorflow/core/util/gpu_kernel_helper.h"
 25 | 
 26 | namespace tensorflow {
 27 | namespace functor {
 28 | 
 29 | typedef Eigen::GpuDevice GPUDevice;
 30 | 
 31 | // Define the CUDA kernel.
 32 | // Cann't use c++ std.
 33 | template <typename T>
 34 | __global__ void BinaryCodeHashCudaKernel(const int size, const T* in, T* out, int length, int t, bool succession) {
 35 |   int block_num;
 36 |   int block_length;
 37 |   if (succession){
 38 |     block_num = (length - 1) / t + 1;
 39 |     block_length = t;
 40 |   } else {
 41 |     block_num = t + 1;
 42 |     block_length = (length - 1) / block_num + 1;
 43 |   }
 44 | 
 45 |   int* binary_code = new int[length];
 46 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size;
 47 |        i += blockDim.x * gridDim.x) {
 48 |     // out[i] = 2 * ldg(in + i);
 49 |     // Convert into binary
 50 |       T num = ldg(in + i);
 51 |       for(int k=0; k<length; k++){
 52 |           if (num > 0){
 53 |             binary_code[k] = num % 2;    
 54 |             num = num / 2; 
 55 |           } else {
 56 |             binary_code[k] = 0;
 57 |           }  
 58 |       }
 59 |       
 60 |       // Convert into 10base every block
 61 |       if (succession){
 62 |         for (int n = 0; n < block_num; n++){
 63 |           T num = 0;
 64 |           T start_index = n * (1 << block_length);
 65 |           for (int m = 0; m < t; m++){
 66 |             if (n*t+m>=length){
 67 |               break;
 68 |             }
 69 |             if (binary_code[n*t+m] == 1){
 70 |               num += 1 << m;
 71 |             }
 72 |           }
 73 |           out[i*block_num+n] = num + start_index;
 74 |         }
 75 |       }else { // skip
 76 |         for (int n = 0; n < block_num; n++){
 77 |           T num = 0;
 78 |           T start_index = n * (1 << block_length);
 79 |           for (int m = n; m < length; m+=t+1){
 80 |             if (binary_code[m] == 1){
 81 |               num += 1 << m;
 82 |             }
 83 |           }
 84 |           out[i*block_num+n] = num + start_index;
 85 |         }
 86 |       }
 87 |   }
 88 |   delete[] binary_code;
 89 | }
 90 | 
 91 | // Define the GPU implementation that launches the CUDA kernel.
 92 | template <typename T>
 93 | struct BinaryCodeHashFunctor<GPUDevice, T> {
 94 |   void operator()(const GPUDevice& d, int size, const T* in, T* out, int length, int t, bool succession) {
 95 |     // std::cout << "@@@@@@ Runnin CUDA @@@@@@" << std::endl;
 96 |     // Launch the cuda kernel.
 97 |     //
 98 |     // See core/util/cuda_kernel_helper.h for example of computing
 99 |     // block count and thread_per_block count.
100 |     int block_count = 1024;
101 |     int thread_per_block = 20;
102 |     BinaryCodeHashCudaKernel<T>
103 |         <<<block_count, thread_per_block, 0, d.stream()>>>(size, in, out, length, t, succession);
104 |   }
105 | };
106 | 
107 | // Explicitly instantiate functors for the types of OpKernels registered.
108 | template struct BinaryCodeHashFunctor<GPUDevice, int32>;
109 | template struct BinaryCodeHashFunctor<GPUDevice, int64>;
110 | }  // end namespace functor
111 | }  // end namespace tensorflow
112 | 
113 | #endif  // GOOGLE_CUDA
114 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/cc/kernels/binary_code_hash_only_cpu_kernels.cc:
--------------------------------------------------------------------------------
  1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | You may obtain a copy of the License at
  6 | 
  7 |     http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | ==============================================================================*/
 15 | 
 16 | #include <string>
 17 | #include <iostream>
 18 | #include "tensorflow/core/framework/op_kernel.h"
 19 | 
 20 | using namespace tensorflow;
 21 | 
 22 | template <typename T>
 23 | class BinaryCodeHashOp : public OpKernel {
 24 |  public:
 25 |   explicit BinaryCodeHashOp(OpKernelConstruction* context) : OpKernel(context) {
 26 |     // Check the inputs
 27 |     OP_REQUIRES_OK(context, context->GetAttr("length", &length_));
 28 |     OP_REQUIRES_OK(context, context->GetAttr("t", &t_));
 29 |     OP_REQUIRES_OK(context, context->GetAttr("strategy", &strategy_));
 30 | 
 31 |     OP_REQUIRES(context, length_ > 0,
 32 |                 errors::InvalidArgument("Need length > 0, got ", length_));
 33 |     OP_REQUIRES(context, t_ > 0,
 34 |                 errors::InvalidArgument("Need t > 0, got ", t_));
 35 |     OP_REQUIRES(context, length_ >= t_,
 36 |                 errors::InvalidArgument("Need length >= t, got length: ", length_, " and t: ", t_));
 37 |     
 38 |     // std::cout << "length: " << length_ << ", t: " << t_ << ", strategy: " << strategy_ << std::endl;
 39 |   }
 40 | 
 41 |   void Compute(OpKernelContext* context) override {
 42 |     // Grab the input tensor
 43 |     const Tensor& input_tensor = context->input(0);
 44 |     auto input = input_tensor.flat<T>();
 45 | 
 46 |     // OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
 47 |     //             errors::InvalidArgument("BinaryCodeHash expects a 1-D vector."));
 48 | 
 49 |     // Create an output tensor
 50 |     int block_num;
 51 |     int block_length;
 52 |     if (strategy_ == "succession"){
 53 |       block_num = (length_ - 1) / t_ + 1;
 54 |       block_length = t_;
 55 |     } else {
 56 |       block_num = t_ + 1;
 57 |       block_length = (length_ - 1) / block_num + 1;
 58 |     }
 59 |     Tensor* output_tensor = NULL;
 60 |     // OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
 61 |     //                                                  &output_tensor));
 62 |     // tensorflow::TensorShape output_shape({input_tensor.shape().dim_size(0), block_num});
 63 |     tensorflow::TensorShape output_shape = input_tensor.shape();
 64 |     output_shape.AddDim(block_num);  // Add New dimension
 65 |     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor));
 66 |     auto output_flat = output_tensor->flat<T>();
 67 | 
 68 |     // Compute Binary Code Hash.
 69 |     const int N = input.size();
 70 |     // std::cout << "input size: " << N << ", output size: " << output_flat.size() << std::endl;
 71 |     for (int i = 0; i < N; i++) {
 72 |       // Convert into binary
 73 |       int binary_code[length_];
 74 |       T num = input(i);
 75 |       for(int k=0; k<length_; k++){
 76 |           if (num > 0){
 77 |             binary_code[k] = num % 2;    
 78 |             num = num / 2; 
 79 |           } else {
 80 |             binary_code[k] = 0;
 81 |           }  
 82 |       }
 83 |       
 84 |       // Convert into 10base every block
 85 |       if (strategy_ == "succession"){
 86 |         for (int n = 0; n < block_num; n++){
 87 |           T num = 0;
 88 |           T start_index = n * (1 << block_length);
 89 |           for (int m = 0; m < t_; m++){
 90 |             if (n*t_+m>=length_){
 91 |               break;
 92 |             }
 93 |             if (binary_code[n*t_+m] == 1){
 94 |               num += 1 << m;
 95 |             }
 96 |           }
 97 |           output_flat(i*block_num+n) = num + start_index;
 98 |         }
 99 |       }else { // skip
100 |         for (int n = 0; n < block_num; n++){
101 |           T num = 0;
102 |           T start_index = n * (1 << block_length);
103 |           for (int m = n; m < length_; m+=t_+1){
104 |             if (binary_code[m] == 1){
105 |               num += 1 << m;
106 |             }
107 |           }
108 |           output_flat(i*block_num+n) = num + start_index;
109 |         }
110 |       }
111 |     }
112 |   }
113 | 
114 |   private:
115 |     int length_;
116 |     int t_;
117 |     std::string strategy_;
118 | };
119 | 
120 | // REGISTER_KERNEL_BUILDER(Name("BinaryCodeHash").Device(DEVICE_CPU).TypeConstraint<T>("T"), BinaryCodeHashOp<T>);
121 | #define REGISTER_CPU(T)                                          \
122 |   REGISTER_KERNEL_BUILDER(                                       \
123 |       Name("BinaryCodeHash").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
124 |       BinaryCodeHashOp<T>);
125 | REGISTER_CPU(int64);
126 | REGISTER_CPU(int32);
127 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/cc/ops/binary_code_hash_ops.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 | 
16 | #include <string>
17 | #include <vector>
18 | #include "tensorflow/core/framework/op.h"
19 | #include "tensorflow/core/framework/shape_inference.h"
20 | 
21 | using namespace tensorflow;
22 | 
23 | REGISTER_OP("BinaryCodeHash")
24 |     .Attr("T: {int64, int32}")
25 |     .Input("hash_id: T")
26 |     .Attr("length: int")
27 |     .Attr("t: int")
28 |     .Attr("strategy: {'succession', 'skip'}")
29 |     .Output("bh_id: T")
30 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
31 |       // c->set_output(0, c->input(0));
32 |       int length;
33 |       int t;
34 |       std::string strategy;
35 |       c->GetAttr("length", &length);
36 |       c->GetAttr("t", &t);
37 |       c->GetAttr("strategy", &strategy);
38 |       int block_num;
39 |       if (strategy == "succession"){
40 |         block_num = (length - 1) / t + 1;
41 |       } else {
42 |         block_num = t + 1;
43 |       }
44 | 
45 |       // 获取输入张量的形状
46 |       shape_inference::ShapeHandle input_shape;
47 |       TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &input_shape));
48 | 
49 |       // 获取输入张量的维度数
50 |       int input_rank = c->Rank(input_shape);
51 | 
52 |       // 创建新的形状列表
53 |       std::vector<shape_inference::DimensionHandle> output_shape;
54 |       for (int i = 0; i < input_rank; ++i) {
55 |           output_shape.push_back(c->Dim(input_shape, i));
56 |       }
57 | 
58 |       // 添加一个额外的维度
59 |       output_shape.push_back(c->MakeDim(block_num));
60 | 
61 |       // 将output_shape转换为输出张量的形状
62 |       c->set_output(0, c->MakeShape(output_shape));
63 | 
64 |       // c->set_output(0, c->MakeShape({c->Dim(c->input(0), 0), c->MakeDim(-1)}));
65 |       // c->set_output(0, c->MakeShape(output_shape));
66 |       return Status::OK();
67 |     });
68 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/python/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/python/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/python/ops/binary_code_hash_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Sonnet Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Use binary_code_hash ops in python."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | from tensorflow.python.framework import load_library
22 | from tensorflow.python.platform import resource_loader
23 | 
24 | binary_code_hash_ops = load_library.load_op_library(
25 |     resource_loader.get_path_to_datafile('_binary_code_hash_ops.so'))
26 | binary_code_hash = binary_code_hash_ops.binary_code_hash
27 | 


--------------------------------------------------------------------------------
/embedding/binary_code_hash_embedding/tensorflow_binary_code_hash/python/ops/binary_code_hash_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Sonnet Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Tests for binary_code_hash ops."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import tensorflow as tf
21 | 
22 | from tensorflow.python.platform import test
23 | try:
24 |   from tensorflow_binary_code_hash.python.ops.binary_code_hash_ops import binary_code_hash
25 | except ImportError:
26 |   from binary_code_hash_ops import binary_code_hash
27 | 
28 | 
29 | class ZeroOutTest(test.TestCase):
30 | 
31 |   def testZeroOut(self):
32 |     with self.test_session() as sess:
33 | 
34 |       if int(tf.__version__.split('.')[0]) == 1:  # tensorflow 1.x
35 |         print(sess.run(binary_code_hash([9999, 16777216, 16777220, 16777300], length=24, t=7, strategy="succession")))
36 |       else:  # tensorflow 2.x
37 |         print(binary_code_hash([9999, 16777216, 16777220, 16777300], length=24, t=7, strategy="succession").numpy())
38 | 
39 | 
40 | if __name__ == '__main__':
41 |   test.main()
42 | 


--------------------------------------------------------------------------------
/embedding/qr_hash.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 论文：Compositional Embeddings Using Complementary Partitions for Memory-Efficient Recommendation Systems
  3 | 
  4 | 地址：https://arxiv.org/abs/1909.02107
  5 | """
  6 | from typing import Optional, Union
  7 | import tensorflow as tf
  8 | 
  9 | 
 10 | class QRHashEmbedding:
 11 | 
 12 |     def __init__(self,
 13 |                  dim: int,
 14 |                  origin_num: int,
 15 |                  remainder_num: int,
 16 |                  compositional_type: str = 'multiply',
 17 |                  hashing: bool = True,
 18 |                  l2_reg: Optional[float] = None):
 19 |         """
 20 | 
 21 |         :param dim: 单个hash embedding维度
 22 |         :param origin_num: QR之前的ID数量
 23 |         :param remainder_num: 取模操作的模数
 24 |         :param compositional_type: hash embeddings的组合方式
 25 |         :param hashing: 是否需要对输入进行hash id映射
 26 |         :param l2_reg: embedding的正则惩罚
 27 |         """
 28 |         self.origin_num = origin_num
 29 |         self.remainder_num = remainder_num
 30 | 
 31 |         quotient_num = origin_num // remainder_num
 32 |         self.quotient_embedding = tf.get_variable('quotient_embedding',
 33 |                                                   shape=[quotient_num, dim],
 34 |                                                   initializer=tf.random_uniform_initializer,
 35 |                                                   regularizer=tf.contrib.layers.l2_regularizer(l2_reg) if l2_reg is not None else None)
 36 |         self.remainder_embedding = tf.get_variable('remainder_embedding',
 37 |                                                    shape=[self.remainder_num, dim],
 38 |                                                    dtype=tf.float32,
 39 |                                                    initializer=tf.random_uniform_initializer,
 40 |                                                    regularizer=tf.contrib.layers.l2_regularizer(l2_reg) if l2_reg is not None else None)
 41 | 
 42 |         self.compositional_type = compositional_type
 43 |         if compositional_type == 'multiply':
 44 |             self.compositional_func = tf.multiply
 45 |         elif compositional_type == 'concat':
 46 |             self.compositional_func = lambda *args: tf.concat(args, axis=-1)
 47 |         elif compositional_type == 'add':
 48 |             self.compositional_func = tf.add
 49 |         else:
 50 |             raise TypeError(f'Only support `compositional_type`: "multiply", "concat", "add"')
 51 | 
 52 |         self.hashing = hashing
 53 | 
 54 |     def __call__(self,
 55 |                  inputs: Union[tf.Tensor, tf.sparse.SparseTensor],
 56 |                  return_mask: bool):
 57 |         """
 58 | 
 59 |         :param inputs:
 60 |         :param return_mask: 是否需要返回SparseTensor的mask
 61 |         :return:
 62 |         """
 63 |         mask = None
 64 | 
 65 |         if not self.hashing and inputs.dtype not in (tf.int32, tf.int64):
 66 |             raise Exception('Inputs must be unique id with type `int32` or `int64` when not hashing')
 67 | 
 68 |         if self.hashing:
 69 |             inputs, mask = self.hash_function(inputs)
 70 |         else:
 71 |             if isinstance(inputs, tf.sparse.SparseTensor):
 72 |                 # 默认值设为-1是为了能够区分缺失位置
 73 |                 inputs = tf.sparse.to_dense(inputs, default_value=-1, validate_indices=True)
 74 |                 mask = tf.cast(tf.math.not_equal(inputs, -1), tf.float32)
 75 |                 # 重新设为0，是为了兼容embedding lookup
 76 |                 inputs = tf.maximum(inputs, 0)
 77 | 
 78 |         # QR Trick
 79 |         q_index = inputs // self.remainder_num
 80 |         r_index = inputs % self.remainder_num
 81 |         q_embs = tf.nn.embedding_lookup(self.quotient_embedding, q_index)
 82 |         r_embs = tf.nn.embedding_lookup(self.remainder_embedding, r_index)
 83 | 
 84 |         # compositional embeddings
 85 |         compositional_embs = self.compositional_func(q_embs, r_embs)
 86 |         if mask is not None:
 87 |             compositional_embs = tf.multiply(compositional_embs, tf.expand_dims(mask, axis=-1))
 88 | 
 89 |         if return_mask:
 90 |             return compositional_embs, mask
 91 |         else:
 92 |             return compositional_embs
 93 | 
 94 |     def hash_function(self, inputs):
 95 |         mask = None
 96 |         if isinstance(inputs, tf.sparse.SparseTensor):
 97 |             if inputs.dtype != tf.string:
 98 |                 inputs = tf.sparse.to_dense(inputs, default_value=-1, validate_indices=True, name=None)
 99 |                 inputs = tf.strings.as_string(inputs)
100 |             else:
101 |                 inputs = tf.sparse.to_dense(inputs, default_value='-1', validate_indices=True, name=None)
102 |             mask = tf.cast(tf.math.not_equal(inputs, '-1'), tf.float32)
103 |         else:
104 |             if inputs.dtype != tf.string:
105 |                 inputs = tf.strings.as_string(inputs)
106 | 
107 |         hash_index = tf.strings.to_hash_bucket_fast(inputs, self.origin_num)
108 | 
109 |         return hash_index, mask
110 | 


--------------------------------------------------------------------------------
/llms/README.md:
--------------------------------------------------------------------------------
 1 | # 1. LangChain
 2 | 
 3 | LangChain开发入门教程
 4 | 
 5 | ## 1.1 Model I/O
 6 | 
 7 | - [prompt_templates.ipynb](https://github.com/QunBB/DeepLearning/tree/main/llms/langchain_tutorial/prompt_templates.ipynb)
 8 | - [llms.ipynb](https://github.com/QunBB/DeepLearning/tree/main/llms/langchain_tutorial/llms.ipynb)
 9 | - [chatmodel.ipynb](https://github.com/QunBB/DeepLearning/tree/main/llms/langchain_tutorial/chatmodel.ipynb)
10 | - [output_parser.ipynb](https://github.com/QunBB/DeepLearning/tree/main/llms/langchain_tutorial/output_parser.ipynb)
11 | 
12 | ## 2.2 RAG/Retrieval
13 | 
14 | - [document_loaders.ipynb](https://github.com/QunBB/DeepLearning/blob/main/llms/langchain_tutorial/document_loaders.ipynb)
15 | - [text_splitter.ipynb](https://github.com/QunBB/DeepLearning/blob/main/llms/langchain_tutorial/text_splitter.ipynb)
16 | - [embeddings.ipynb](https://github.com/QunBB/DeepLearning/blob/main/llms/langchain_tutorial/embeddings.ipynb)
17 | - [vector_store.ipynb](https://github.com/QunBB/DeepLearning/blob/main/llms/langchain_tutorial/vector_store.ipynb)
18 | - [retrievers.ipynb](https://github.com/QunBB/DeepLearning/blob/main/llms/langchain_tutorial/retrievers.ipynb)
19 | 
20 | ## 3.3 Tools/Agents
21 | 
22 | - [tools_agents.ipynb](https://github.com/QunBB/DeepLearning/blob/main/llms/langchain_tutorial/tools_agents.ipynb)
23 | 
24 | # 2. LLM训练
25 | 
26 | ## 2.1 复现DeepSeek-R1的思维链
27 | 
28 | - [sft+grpo微调](https://github.com/QunBB/DeepLearning/tree/main/llms/train/deepseek-train)
29 | 


--------------------------------------------------------------------------------
/llms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/llms/__init__.py


--------------------------------------------------------------------------------
/llms/langchain_tutorial/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/llms/langchain_tutorial/__init__.py


--------------------------------------------------------------------------------
/llms/langchain_tutorial/callbacks/manager.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import logging
 4 | from contextlib import contextmanager
 5 | from contextvars import ContextVar
 6 | from typing import (
 7 |     Generator,
 8 |     Optional,
 9 | )
10 | 
11 | from langchain_core.tracers.context import register_configure_hook
12 | 
13 | from .generic_llms_info import GenericLLMsCallbackHandler
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | generic_llms_callback_var: ContextVar[Optional[GenericLLMsCallbackHandler]] = ContextVar(
18 |     "generic_llms_callback", default=None
19 | )
20 | 
21 | register_configure_hook(generic_llms_callback_var, True)
22 | 
23 | 
24 | @contextmanager
25 | def get_generic_llms_callback() -> Generator[GenericLLMsCallbackHandler, None, None]:
26 |     """Get the CN LLM callback handler in a context manager.
27 |     which conveniently exposes token and cost information.
28 | 
29 |     Returns:
30 |         GenericLLMsCallbackHandler: The Generic callback handler.
31 | 
32 |     Example:
33 |         >>> with get_generic_llms_callback() as cb:
34 |         ...     # Use the CN LLM callback handler, e.g. Tongyi
35 |     """
36 |     cb = GenericLLMsCallbackHandler()
37 |     generic_llms_callback_var.set(cb)
38 |     yield cb
39 |     generic_llms_callback_var.set(None)
40 | 


--------------------------------------------------------------------------------
/llms/langchain_tutorial/examples/no_fields_name.csv:
--------------------------------------------------------------------------------
1 | 1,张三,本科
2 | 2,李四,硕士


--------------------------------------------------------------------------------
/llms/langchain_tutorial/examples/rag.txt:
--------------------------------------------------------------------------------
  1 | 2024年普通高等学校招生全国统一考试（简称：2024年全国高考），是中华人民共和国合格的高中毕业生或具有同等学力的考生参加的选拔性考试 [1-2]。2024年报名人数1342万人，比2023年增加51万人 [21]。
  2 | 
  3 | 
  4 | 2024年高考是黑龙江、甘肃、吉林、安徽、江西、贵州、广西7个省份（中国第四批高考综合改革省份）的第一届落地实施的新高考。 [3]
  5 | 
  6 | 
  7 | 2024年高考全国统考于2024年6月7日开始举行，部分省份考试时间为2天，实行新高考的省份为3-4天 [29]。5月31日，2024年高考试卷从北京发往全国 [22]。6月5日，2024年高考举报电话已开通，教育部教育考试院的举报电话为：010-62790357 [53] [77]。
  8 | 
  9 | 
 10 | 地区,报名时间
 11 | 北京,2023年10月25日9时至28日17时（进城务工人员随迁子女申请时间为10月10日9时至11日17时）
 12 | 上海,2023年10月16日-19日（每天8:00-21:00）、10月20日（8:00-16:00）
 13 | 天津,2023年11月1日9时至7日17时
 14 | 重庆,2023年10月24日-11月7日
 15 | 河北,2023年10月30日09时至11月13日17时
 16 | 山西,2023年11月5日8:00—10日18:00
 17 | 内蒙古,2023年11月2日9:00至13日18:00
 18 | 山东,2023年11月9日至15日（每天9:00—18:00）
 19 | 江苏,2023年11月1日至3日（8:30-22:00）;11月4日（8:30-17:00）
 20 | 浙江,2023年11月1日9:00至10日17:00
 21 | 江西,2023年11月1日9:00—7日17:00
 22 | 福建,2023年10月25日至30日
 23 | 安徽,2023年10月25日10:00至29日17:00
 24 | 河南,艺术类为2023年11月1日9:00至5日17:00；非艺术类为11月8日9:00至23日17:00
 25 | 湖南,2023年10月23日至31日
 26 | 湖北,2023年11月8日-18日
 27 | 四川,2023年10月14日至20日
 28 | 云南,2023年11月5-15日
 29 | 贵州,2023年11月1日00:00至10日24:00
 30 | 西藏,2023年11月1日至12月1日
 31 | 辽宁,2023年10月27日至10月31日
 32 | 吉林,10月5日—10日（9:00—16:30）
 33 | 黑龙江,10月9日至20日(含节假日)，每日9时至21时
 34 | 陕西,2023年11月1日至7日
 35 | 甘肃,2023年11月1日至10日
 36 | 宁夏,2023年11月1日至5日
 37 | 青海,2023年12月1日—15日（含赴省外就读考生报名）
 38 | 新疆,2023年12月15日前结束
 39 | 海南,2023年11月16日至21日
 40 | 广东,2023年11月1日至10日
 41 | 广西,2023年10月23日至31日17:30
 42 | 
 43 | 
 44 | 报名人数
 45 | 2024年高考报名人数
 46 | 地区,报名情况
 47 | 全国,2024年全国高考报名人数达到1342万人，比去年增加了51万人。 [21]
 48 | 北京,2024年北京市报名参加高考考生共计6.72万人，全市共设18个考区、105个考点、2066个考场、315个备用考场。 [48]
 49 | 上海,约5.4万名考生参加考试。 [105]
 50 | 重庆,2024年，重庆市除高职分类考试招生、保送生等已录取考生和体育单招、残疾单招等考生外，应参加统一高考的考生20.3万人，比去年增加0.9万人。全市共设置考区39个，考点106个，考场6861个。 [74]
 51 | 
 52 | 
 53 | 一、在哪里可以了解高考成绩、志愿填报时间和方式、各高校招生计划、往年录取参考等志愿填报权威信息？
 54 | 各省级教育行政部门或招生考试机构官方网站、微信公众号等权威渠道都会公布今年高考各阶段工作时间安排，包括高考成绩公布时间和查询方式、志愿填报时间，以及今年各高校招生计划、往年录取情况参考等权威信息。考生和家长要及时关注本地官方权威渠道发布的消息内容。
 55 | 考生高考志愿是高校录取的重要依据，请广大考生务必按照省级招生考试机构相关要求按时完成志愿填报。前期，教育部已会同有关部门协调互联网平台对省级招生考试机构和高校的官方网站、微信公众号等进行了权威标识，请广大考生在信息查询时认准官方权威渠道，切勿轻信网络不实信息。
 56 | 
 57 | 
 58 | 二、高考志愿填报咨询有哪些公共服务？
 59 | 教育部高度重视高考志愿填报咨询服务工作，指导各地建立了招生考试机构、高校、中学多方面志愿填报咨询公共服务体系。在教育部层面，首次在“阳光高考平台”推出免费的阳光志愿信息服务系统，将海量数据系统集成，进行个性化匹配推荐，从专业、就业、职业等多方面帮助考生了解学校和专业。同时还将举办“高考志愿填报云咨询周”活动，组织各省级招生考试机构和高校通过文字问答、视频直播等方式，为全国考生和家长提供志愿填报咨询指导公益服务。在各地层面，地方招生考试机构将通过印发志愿填报指导材料、推出志愿填报参考信息服务系统等多种方式为考生提供填报志愿所需的必要信息和服务，包括今年高校招生计划、高校近年录取情况、志愿填报办法和招生录取政策、考生须知等，并通过电视（电台）政策宣讲、线上直播咨询等方式为考生解疑释惑。在学校层面，招生高校会组织开展线上线下咨询活动和在线直播等活动，解读学校招生章程、招生办法和往年录取参考信息，提供各类志愿填报咨询服务；中学会面向考生和家长进行志愿填报培训，及时提供相关部门和高校权威宣传解读资料、发布渠道、发布安排等信息，并组织教师为考生和家长提供针对性地指导服务。
 60 | 考生可以通过所在地省级招生考试机构和各高校官方网站、官方微信公众号或编发的志愿填报指导材料等渠道查询所需参考信息，也可通过拨打当地招生考试机构、高校和中学开通的咨询电话或线上咨询等方式了解相关政策和信息。
 61 | 
 62 | 
 63 | 三、高校招生章程有什么作用，如何查询？
 64 | 高校招生章程由学校依据相关法律规定和国家招生政策制定，是学校开展招生工作的依据。考生在填报志愿前，应仔细查阅拟报考高校的招生章程，全面了解高校招生办法和相关招生要求。
 65 | 主要查询途径有：中国高等教育学生信息网的“阳光高考”信息平台（https://gaokao.chsi.com.cn）；各高校官方招生网站等。
 66 | 
 67 | 
 68 | 四、高考招生有哪些投档录取模式？
 69 | 高考投档录取模式由各省级招生委员会确定，一般有两种模式：平行志愿投档录取和顺序志愿投档录取。
 70 | 平行志愿投档录取模式，采用“分数（位次）优先、遵循志愿”的原则。先按照考生高考成绩从高到低进行排序，再依据考生填报的平行志愿顺序、结合高校招生计划和投档比例依次进行检索，当检索的志愿有投档缺额时即进行投档，投档成功后不再检索填报的后续平行志愿。在一轮完整的平行志愿检索投档过程中，每位考生最多只有一次被投出（投档成功）的机会。
 71 | 顺序志愿投档录取模式，也称为梯度志愿投档录取模式，采用“志愿优先、遵循分数”的原则。即同一批次允许考生填报若干个按顺序排列的志愿，通常称为第一志愿、第二志愿、第三志愿等。在第一志愿投档时，凡第一志愿填报相同的考生，遵循从高分到低分按投档缺额进行投档；第一志愿投档录取完毕后，如所填报志愿有投档缺额，进行第二志愿投档录取。依此类推。
 72 | 投档工作由省级招生委员会负责，考生电子档案投档到相关高校后，高校按照向社会公布的招生章程中的录取规则进行录取。
 73 | 
 74 | 
 75 | 五、考生被投档后有退档风险吗，哪些因素会造成退档，被高校退档影响后续批次录取吗？
 76 | 高校招生录取实行“学校负责、省级招办监督”机制。无论是平行志愿投档录取还是顺序志愿投档录取，考生的退档风险主要有两个。一是考生高考成绩没有达到所报专业录取分数且不服从专业调剂；二是考生存在不符合高校招生章程规定的其他要求。因此考生在填报志愿时一定要认真阅读拟报高校的招生章程，防止因不符合高校招生章程规定被退档。
 77 | 被高校退档的考生还可参加本批次征集志愿投档录取（省级招生考试机构将高校未录满的计划再次公布补征志愿）或后续批次的投档录取。已被录取的考生，不能参加后续志愿投档录取。
 78 | 
 79 | 
 80 | 六、填报志愿前要做哪些准备工作？
 81 | 考生在志愿填报前要做到四个了解，做到“知己知彼”。一是了解国家需要。个人的成长与国家和社会的发展息息相关。拟报的学校和专业选择应充分考虑国家和社会需要，立志在服务国家和社会发展中体现个人成长价值。二是了解个人情况。适合的教育才是最好的教育。考生要综合考虑自己的高考成绩、兴趣特长、未来志向等因素，理性考虑意向学校。三是了解高校情况。了解意向高校近年录取分数（位次）情况、学校招生章程内容和该校在本省招生各个专业人才培养、就业前景等情况，新高考省份考生还要注意高校招生专业的选考科目要求。四是了解招生政策。了解本省（区、市）志愿填报办法、投档录取规则等信息。如有不清楚的地方，应及时向有关招生考试机构、高校和中学咨询。
 82 | 
 83 | 
 84 | 七、填报志愿时的总体思路是什么？
 85 | 应结合考生本人实际情况，综合分析各方面信息，科学选择，自主填报。
 86 | 一是合理定位。知分、知线、知位次。成绩（位次）的高低，是考生填报志愿的基础因素，决定了可选择的高校和专业范围。考生要合理定位，理性选择志愿，不盲目攀高，也不妄自菲薄。
 87 | 二是统筹考虑。考生应结合个人志向、兴趣爱好、特长优势、生涯规划等多种因素，着眼于国家需要和社会需求，综合考虑。高校专业没有所谓“冷热”，适合自己的才是最好的，考生和家长切忌盲从。
 88 | 三是初选方案。按照“冲一冲、稳一稳、保一保”的思路，结合本省（区、市）可填报的志愿数量，参照意向高校近年录取情况，分冲刺志愿（往年录取位次略高）、稳健志愿（往年录取位次接近）、保底志愿（往年录取位次偏低）三个区间段选择相应的高校。同一区间段内几个高校应参照往年录取位次适当拉开距离，保持一定的梯度。要注意选好保底志愿，防止所有志愿偏高脱档。
 89 | 四是优化完善。对初选志愿进行综合优化，根据意愿，合理排序，避免志愿顺序安排不合理带来的风险。平行志愿在投档检索时有先后顺序，考生在充分了解相关高校近几年录取位次及位次变化情况的基础上，结合自己的意愿合理安排志愿顺序。
 90 | 五是正式填报。根据确定拟报考志愿的顺序，严格按照省级招生考试机构规定的志愿填报流程和操作要求，填报个人志愿信息。
 91 | 
 92 | 
 93 | 八、填报志愿时有哪些注意事项？
 94 | 一是牢记填报时间，尽早进行填报。考生务必在规定时间内填报志愿，填报志愿系统按照规定时间关闭，关闭后考生将不能再填报。建议考生尽早填报，尽量避免集中在截止时间临近前填报。　　
 95 | 二是增强防范意识，管好个人信息。考生应妥善保管好自己的身份证号、考生号等个人信息和志愿填报系统密码等登录信息，不要泄露给他人，防止志愿被他人篡改。
 96 | 三是勿信小道信息，谨防上当受骗。社会上一些机构或个人开展的各类志愿填报咨询活动存在政策解读不准确、信息提供不真实、费用收取不规范甚至诈骗等问题，提醒考生和家长不要轻信。此外，还存在有机构或个人以所谓的“权威专家”“内部信息”等名义开展虚假诈骗活动，提醒考生和家长提高警惕，严防被骗。
 97 | 
 98 | 
 99 | 九、录取工作采用什么方式，一般什么时间开始？
100 | 高校招生实行计算机远程网上录取，各省（区、市）录取工作一般于7月上旬开始，8月底之前结束。
101 | 
102 | 
103 | 十、录取通知书何时能收到？
104 | 高校一般会在录取结束后一周左右向录取新生寄发录取通知书。若考生在省级招生考试机构或高校官方网站上查询到了录取结果，一直没有收到录取通知书，可及时联系录取高校公布的招生咨询电话查询本人录取通知书邮寄情况。


--------------------------------------------------------------------------------
/llms/langchain_tutorial/examples/sql.md:
--------------------------------------------------------------------------------
 1 | ## 创建表
 2 | 
 3 | ```sql
 4 | # 分区表
 5 | create table test_t2(words string,frequency string) partitioned by (partdate string) row format delimited fields terminated by ',';
 6 | 
 7 | # orc表
 8 | CREATE TABLE IF NOT EXISTS bank.account_orc (
 9 |   `id_card` int,
10 |   `tran_time` string,
11 |   `name` string,
12 |   `cash` int
13 |   )
14 | stored as orc;
15 | ```
16 | 
17 | # 插入数据
18 | 
19 | ```sql
20 | insert into tablename values('col1', 'col2');
21 | 
22 | 
23 | INSERT INTO table_name (column1, column2, column3)
24 | VALUES
25 | (value1, value2, value3),
26 | (value4, value5, value6),
27 | (value7, value8, value9);
28 | 
29 | 
30 | INSERT OVERWRITE TABLE tb
31 | select * from tb2
32 | ;
33 | ```


--------------------------------------------------------------------------------
/llms/langchain_tutorial/examples/sql.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/llms/langchain_tutorial/examples/sql.pdf


--------------------------------------------------------------------------------
/llms/langchain_tutorial/examples/test.csv:
--------------------------------------------------------------------------------
1 | id,name,degree
2 | 1,张三,本科
3 | 2,李四,硕士


--------------------------------------------------------------------------------
/llms/langchain_tutorial/requirements.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.1.20
 2 | # 科大讯飞
 3 | websocket-client
 4 | # 通义千问
 5 | dashscope
 6 | pypdf
 7 | aiofiles
 8 | # SemanticChunker
 9 | langchain_experimental
10 | faiss-cpu
11 | rank_bm25
12 | langchain_chroma
13 | lark


--------------------------------------------------------------------------------
/llms/langchain_tutorial/tongyi/embeddings.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional
  2 | 
  3 | import logging
  4 | import requests
  5 | from langchain_core.embeddings import Embeddings
  6 | from langchain_core.pydantic_v1 import BaseModel, SecretStr, root_validator
  7 | from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
  8 | 
  9 | 
 10 | from typing import Generator, List
 11 | import dashscope
 12 | from http import HTTPStatus
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | EMBEDDING_MODELS = {
 17 |     "text_embedding_v1": dashscope.TextEmbedding.Models.text_embedding_v1,
 18 |     "text_embedding_v2": dashscope.TextEmbedding.Models.text_embedding_v2
 19 | }
 20 | # 最多支持25条，每条最长支持2048tokens
 21 | DASHSCOPE_MAX_BATCH_SIZE = 25
 22 | 
 23 | 
 24 | def batched(inputs: List,
 25 |             batch_size: int = DASHSCOPE_MAX_BATCH_SIZE) -> Generator[List, None, None]:
 26 |     for i in range(0, len(inputs), batch_size):
 27 |         yield inputs[i:i + batch_size]
 28 | 
 29 | 
 30 | class TongyiEmbeddings(BaseModel, Embeddings):
 31 |     """Tongyi embedding models."""
 32 | 
 33 |     model_name: str = "text_embedding_v1"
 34 |     dashscope_api_key: Optional[SecretStr] = None
 35 |     retry_count: int = 3
 36 | 
 37 |     @property
 38 |     def lc_secrets(self) -> Dict[str, str]:
 39 |         return {"dashscope_api_key": "DASHSCOPE_API_KEY"}
 40 | 
 41 |     @root_validator()
 42 |     def validate_environment(cls, values: Dict) -> Dict:
 43 |         """Validate that api key and python package exists in environment."""
 44 |         values["dashscope_api_key"] = convert_to_secret_str(
 45 |             get_from_dict_or_env(values, "dashscope_api_key", "DASHSCOPE_API_KEY")
 46 |         )
 47 |         try:
 48 |             import dashscope
 49 |         except ImportError:
 50 |             raise ImportError(
 51 |                 "Could not import dashscope python package. "
 52 |                 "Please install it with `pip install dashscope --upgrade`."
 53 |             )
 54 | 
 55 |         return values
 56 |     
 57 |     def _embeb_retry(self, texts: List) -> Dict:
 58 |         embeddings = None
 59 |         for _ in range(self.retry_count):
 60 |             resp = dashscope.TextEmbedding.call(
 61 |                 model=EMBEDDING_MODELS[self.model_name],
 62 |                 input=texts)
 63 |             
 64 |             if resp.status_code != HTTPStatus.OK:
 65 |                 logging.error(resp.message)
 66 |                 continue
 67 | 
 68 |             embeddings = resp.output['embeddings']
 69 |             break
 70 |             
 71 |         if embeddings is None:
 72 |             raise RuntimeError(f"TongyiEmbeddings' failed up to {self.retry_count} times") 
 73 | 
 74 |         return embeddings
 75 | 
 76 |     def _embed(self, texts: List[str]) -> List[List[float]]:
 77 |         # Call Tongyi Embedding SDK
 78 |         result = None  # merge the results.
 79 |         batch_counter = 0
 80 |         for batch in batched(texts):
 81 |             batch_emb = self._embeb_retry(batch)
 82 |             if result is None:
 83 |                 result = batch_emb
 84 |             else:
 85 |                 for emb in batch_emb:
 86 |                     emb['text_index'] += batch_counter
 87 |                     result.append(emb)
 88 |             batch_counter += len(batch)
 89 | 
 90 |         # Sort resulting embeddings by index
 91 |         sorted_embeddings = sorted(result, key=lambda e: e["text_index"])  # type: ignore
 92 | 
 93 |         # Return just the embeddings
 94 |         return [result["embedding"] for result in sorted_embeddings]
 95 | 
 96 |     def embed_documents(self, texts: List[str]) -> List[List[float]]:
 97 |         """Call out to Jina's embedding endpoint.
 98 |         Args:
 99 |             texts: The list of texts to embed.
100 |         Returns:
101 |             List of embeddings, one for each text.
102 |         """
103 |         return self._embed(texts)
104 | 
105 |     def embed_query(self, text: str) -> List[float]:
106 |         """Call out to Jina's embedding endpoint.
107 |         Args:
108 |             text: The text to embed.
109 |         Returns:
110 |             Embeddings for the text.
111 |         """
112 |         return self._embed([text])[0]
113 | 


--------------------------------------------------------------------------------
/llms/langchain_tutorial/tongyi/llm.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import asyncio
 4 | import functools
 5 | import logging
 6 | from typing import (
 7 |     Any,
 8 |     AsyncIterable,
 9 |     AsyncIterator,
10 |     Callable,
11 |     Dict,
12 |     Iterable,
13 |     Iterator,
14 |     List,
15 |     Mapping,
16 |     Optional,
17 |     Tuple,
18 |     TypeVar,
19 | )
20 | 
21 | from langchain_core.callbacks import (
22 |     AsyncCallbackManagerForLLMRun,
23 |     CallbackManagerForLLMRun,
24 | )
25 | from langchain_core.language_models.llms import BaseLLM
26 | from langchain_core.outputs import Generation, GenerationChunk, LLMResult
27 | from langchain_core.pydantic_v1 import Field, root_validator
28 | from langchain_core.utils import get_from_dict_or_env
29 | from requests.exceptions import HTTPError
30 | from tenacity import (
31 |     before_sleep_log,
32 |     retry,
33 |     retry_if_exception_type,
34 |     stop_after_attempt,
35 |     wait_exponential,
36 | )
37 | 
38 | logger = logging.getLogger(__name__)
39 | T = TypeVar("T")
40 | 
41 | from langchain.llms import Tongyi
42 | 
43 | 
44 | class CustomTongyi(Tongyi):
45 |     """Add Features:
46 |         1. Support tracing tokens usage when calling stream()"""
47 | 
48 |     def _generation_from_qwen_resp(
49 |             self, resp: Any, is_last_chunk: bool = True
50 |     ) -> Dict[str, Any]:
51 |         # According to the response from dashscope,
52 |         # each chunk's `generation_info` overwrites the previous one.
53 |         # Besides, The `merge_dicts` method,
54 |         # which is used to concatenate `generation_info` in `GenerationChunk`,
55 |         # does not support merging of int type values.
56 |         # Therefore, we adopt the `generation_info` of the last chunk
57 |         # and discard the `generation_info` of the intermediate chunks.
58 |         if is_last_chunk:
59 |             return dict(
60 |                 text=resp["output"]["text"],
61 |                 generation_info=dict(
62 |                     finish_reason=resp["output"]["finish_reason"],
63 |                     request_id=resp["request_id"],
64 |                     token_usage=dict(resp["usage"]),
65 |                     model_name=self.model_name
66 |                 ),
67 |             )
68 |         else:
69 |             return dict(text=resp["output"]["text"])
70 | 


--------------------------------------------------------------------------------
/llms/langchain_tutorial/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/llms/langchain_tutorial/tools/__init__.py


--------------------------------------------------------------------------------
/llms/langchain_tutorial/tools/function_calling.py:
--------------------------------------------------------------------------------
 1 | from typing import (
 2 |     Any,
 3 |     Callable,
 4 |     Dict,
 5 |     Type,
 6 |     Union
 7 | )
 8 | 
 9 | from langchain_core.utils.function_calling import convert_to_openai_function
10 | from langchain_core.pydantic_v1 import BaseModel
11 | from langchain_core.tools import BaseTool
12 | 
13 | 
14 | def convert_to_openai_tool(function: Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool],
15 | ) -> Dict[str, Any]:
16 |     function = convert_to_openai_function(function)
17 | 
18 |     tool = {"type": "function",
19 |             "function": {
20 |                 "name": function["name"],
21 |                 "description": function["description"],
22 |                 "parameters": function["parameters"]
23 |             }}
24 | 
25 |     if "required" in function:
26 |         tool["required"] = function["function"]["required"]
27 | 
28 |     return tool
29 | 


--------------------------------------------------------------------------------
/llms/langchain_tutorial/tools/tools.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Tuple, Union
 2 | 
 3 | from langchain.tools import StructuredTool
 4 | 
 5 | 
 6 | class ModifyStructuredTool(StructuredTool):
 7 |     def _parse_input(
 8 |         self,
 9 |         tool_input: Union[str, Dict],
10 |     ) -> Union[str, Dict[str, Any]]:
11 |         """Convert tool input to pydantic model."""
12 |         input_args = self.args_schema
13 |         if isinstance(tool_input, str):
14 |             if input_args is not None and input_args.__fields__:
15 |                 key_ = next(iter(input_args.__fields__.keys()))
16 |                 input_args.validate({key_: tool_input})
17 |             return tool_input
18 |         else:
19 |             if input_args is not None:
20 |                 result = input_args.parse_obj(tool_input)
21 |                 return {
22 |                     k: getattr(result, k)
23 |                     for k, v in result.dict().items()
24 |                     if k in tool_input
25 |                 }
26 |         return tool_input
27 | 
28 |     def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:
29 |         # For backwards compatibility, if run_input is a string,
30 |         # pass as a positional argument.
31 |         if self.args_schema is None or not self.args_schema.__fields__:
32 |             return (), {}
33 |         if isinstance(tool_input, str):
34 |             return (tool_input,), {}
35 |         else:
36 |             return (), tool_input
37 | 


--------------------------------------------------------------------------------
/llms/train/README.md:
--------------------------------------------------------------------------------
 1 | # 复现DeepSeek-R1的思维链
 2 | 
 3 | 目录：[deepseek-train](https://github.com/QunBB/DeepLearning/tree/main/llms/train/deepseek-train)
 4 | 
 5 | ## 环境搭建
 6 | 
 7 | ```bash
 8 | conda create -n vllm python=3.12
 9 | 
10 | conda activate vllm
11 | pip install vllm -U
12 | pip install trl -U
13 | 
14 | # modelscope
15 | pip install addict modelscope
16 | ```
17 | 
18 | ## SFT+GRPO训练
19 | 
20 | ```bash
21 | # sft
22 | python main.py --task=sft_train --model_name_or_path=Qwen/Qwen2.5-0.5B-Instruct --bf16 --checkpoint_dir=outputs/Qwen-0.5B-SFT-FirstHalf --per_device_train_batch_size=8 --save_strategy=epoch --epochs=1
23 | 
24 | # grpo
25 | python main.py --task=grpo_train --model_name_or_path=outputs/Qwen-0.5B-SFT-FirstHalf/checkpoint-117 --bf16 --use_vllm --checkpoint_dir=outputs/Qwen-0.5B-GRPO-SecondHalf --per_device_train_batch_size=8 --save_strategy=epoch 
26 | ```
27 | 
28 | ## 推理
29 | ```bash
30 | python main.py --task=inference --checkpoint_dir=outputs/Qwen-0.5B-GRPO-SecondHalf/checkpoint-934
31 | ```
32 | 
33 | ```text
34 | 请输入你的问题：
35 | Natalia sold clips to 22 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?
36 | 
37 | Assistant:
38 | <think>
39 | In April, Natalia sold clips to 22 friends.
40 | In May, she sold half as many clips as in April, which is 22/2 = <<22/2=11>>11 clips.
41 |   Altogether, Natalia sold 22+11 = <<22+11=33>>33 clips in April and May.
42 | </think>
43 | <answer>
44 | 33
45 | </answer>
46 | ```
47 | 


--------------------------------------------------------------------------------
/llms/train/deepseek-train/README.md:
--------------------------------------------------------------------------------
 1 | # 环境搭建
 2 | 
 3 | ```bash
 4 | conda create -n vllm python=3.12
 5 | 
 6 | conda activate vllm
 7 | pip install vllm -U
 8 | pip install trl -U
 9 | 
10 | # modelscope
11 | pip install addict modelscope
12 | ```
13 | 
14 | # SFT+GRPO训练
15 | 
16 | ```bash
17 | # sft
18 | python main.py --task=sft_train --model_name_or_path=Qwen/Qwen2.5-0.5B-Instruct --bf16 --checkpoint_dir=outputs/Qwen-0.5B-SFT-FirstHalf --per_device_train_batch_size=8 --save_strategy=epoch --epochs=1
19 | 
20 | # grpo
21 | python main.py --task=grpo_train --model_name_or_path=outputs/Qwen-0.5B-SFT-FirstHalf/checkpoint-117 --bf16 --use_vllm --checkpoint_dir=outputs/Qwen-0.5B-GRPO-SecondHalf --per_device_train_batch_size=8 --save_strategy=epoch 
22 | ```
23 | 
24 | # 推理
25 | ```bash
26 | python main.py --task=inference --checkpoint_dir=outputs/Qwen-0.5B-GRPO-SecondHalf/checkpoint-934
27 | ```
28 | 
29 | ```text
30 | 请输入你的问题：
31 | Natalia sold clips to 22 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?
32 | 
33 | Assistant:
34 | <think>
35 | In April, Natalia sold clips to 22 friends.
36 | In May, she sold half as many clips as in April, which is 22/2 = <<22/2=11>>11 clips.
37 |   Altogether, Natalia sold 22+11 = <<22+11=33>>33 clips in April and May.
38 | </think>
39 | <answer>
40 | 33
41 | </answer>
42 | ```
43 | 


--------------------------------------------------------------------------------
/llms/train/deepseek-train/grpo_train.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from modelscope import AutoTokenizer, AutoModelForCausalLM
 3 | from trl import GRPOConfig, GRPOTrainer
 4 | 
 5 | from utils import get_gsm8k_dataset
 6 | from reward import REWARD_FUNCS
 7 | 
 8 | 
 9 | def train(args):
10 | 
11 |     training_args = GRPOConfig(
12 |         output_dir=args.checkpoint_dir,
13 |         learning_rate=args.learning_rate,
14 |         adam_beta1=args.adam_beta1,
15 |         adam_beta2=args.adam_beta2,
16 |         weight_decay=args.weight_decay,
17 |         warmup_ratio=args.warmup_ratio,
18 |         lr_scheduler_type=args.lr_scheduler_type,
19 |         logging_steps=args.logging_steps,
20 |         bf16=args.bf16,
21 |         per_device_train_batch_size=args.per_device_train_batch_size,
22 |         gradient_accumulation_steps=args.gradient_accumulation_steps,
23 |         num_generations=args.num_generations,
24 |         max_prompt_length=args.max_prompt_length,
25 |         max_completion_length=args.max_completion_length,
26 |         num_train_epochs=args.epochs,
27 |         save_steps=args.save_steps,
28 |         save_strategy=args.save_strategy,
29 |         max_grad_norm=args.max_grad_norm,
30 |         log_on_each_node=False,
31 |         use_vllm=args.use_vllm,
32 |         vllm_device=args.vllm_device,
33 |         vllm_gpu_memory_utilization=args.vllm_gpu_ratio,
34 |         report_to="none"
35 |     )
36 | 
37 |     model = AutoModelForCausalLM.from_pretrained(
38 |         args.model_name_or_path,
39 |         torch_dtype=torch.bfloat16 if args.bf16 else None,
40 |         device_map=None,
41 |         cache_dir=args.cache_dir
42 |     ).to("cuda")
43 | 
44 |     tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir)
45 |     tokenizer.pad_token = tokenizer.eos_token
46 | 
47 |     reward_funcs = [REWARD_FUNCS[func.strip()] for func in args.reward_funcs.split(',')]
48 | 
49 |     trainer = GRPOTrainer(
50 |         model=model,
51 |         processing_class=tokenizer,
52 |         reward_funcs=reward_funcs,
53 |         args=training_args,
54 |         train_dataset=get_gsm8k_dataset(cache_dir=args.cache_dir,
55 |                                         first_half=args.split_half == "first_half",
56 |                                         second_half=args.split_half == "second_half"),
57 |     )
58 |     trainer.train()
59 | 


--------------------------------------------------------------------------------
/llms/train/deepseek-train/inference.py:
--------------------------------------------------------------------------------
 1 | from modelscope import AutoModelForCausalLM, AutoTokenizer
 2 | 
 3 | from utils import SYSTEM_PROMPT
 4 | 
 5 | 
 6 | def infer(args):
 7 | 
 8 |     model = AutoModelForCausalLM.from_pretrained(
 9 |         args.checkpoint_dir,
10 |         torch_dtype="auto",
11 |         device_map="auto"
12 |     )
13 |     tokenizer = AutoTokenizer.from_pretrained(args.checkpoint_dir)
14 | 
15 |     # prompt = "Xiao Ming bought 4 apples, ate 1, and gave 1 to his sister. How many apples were left?"
16 |     while True:
17 |         print("请输入你的问题：")
18 |         prompt = input()
19 | 
20 |         if prompt in ("exit", "bye"):
21 |             print("Assistant: 再见👋")
22 |             break
23 | 
24 |         messages = [
25 |             {"role": "system", "content": SYSTEM_PROMPT},
26 |             {"role": "user", "content": prompt}
27 |         ]
28 |         text = tokenizer.apply_chat_template(
29 |             messages,
30 |             tokenize=False,
31 |             add_generation_prompt=True
32 |         )
33 |         model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
34 | 
35 |         generated_ids = model.generate(
36 |             **model_inputs,
37 |             max_new_tokens=args.max_completion_length
38 |         )
39 |         generated_ids = [
40 |             output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
41 |         ]
42 | 
43 |         response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
44 |         print(f"Assistant:\n{response}")
45 | 


--------------------------------------------------------------------------------
/llms/train/deepseek-train/requirements.txt:
--------------------------------------------------------------------------------
1 | vllm
2 | trl
3 | addict
4 | modelscope
5 | 


--------------------------------------------------------------------------------
/llms/train/deepseek-train/reward.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def extract_xml_answer(text: str) -> str:
 5 |     answer = text.split("<answer>")[-1]
 6 |     answer = answer.split("</answer>")[0]
 7 |     return answer.strip()
 8 | 
 9 | 
10 | def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[float]:
11 |     """检查LLM输出的答案是否完全正确"""
12 |     responses = [completion[0]['content'] for completion in completions]
13 |     extracted_responses = [extract_xml_answer(r) for r in responses]
14 | 
15 |     q = prompts[0][-1]['content']
16 |     print('-' * 20, f"Question:\n{q}", f"\nAnswer:\n{answer[0]}", f"\nResponse:\n{responses[0]}",
17 |           f"\nExtracted:\n{extracted_responses[0]}")
18 | 
19 |     return [2.0 if r == a else 0.0 for r, a in zip(extracted_responses, answer)]
20 | 
21 | 
22 | def int_reward_func(completions, **kwargs) -> list[float]:
23 |     """由于gsm8k数据集答案都是整型。检查LLM输出的答案是否为整型"""
24 |     responses = [completion[0]['content'] for completion in completions]
25 |     extracted_responses = [extract_xml_answer(r) for r in responses]
26 |     return [0.5 if r.isdigit() else 0.0 for r in extracted_responses]
27 | 
28 | 
29 | def strict_format_reward_func(completions, **kwargs) -> list[float]:
30 |     """检查LLM输出是否完全按照思维链的格式"""
31 |     pattern = r"^<think>.*?</think>\s*<answer>.*?</answer>\n?$"
32 |     responses = [completion[0]["content"] for completion in completions]
33 |     matches = [re.match(pattern, r, re.DOTALL) for r in responses]
34 |     return [0.5 if match else 0.0 for match in matches]
35 | 
36 | 
37 | def soft_format_reward_func(completions, **kwargs) -> list[float]:
38 |     """检查LLM输出是否存在符合思维链格式的部分"""
39 |     pattern = r"<think>.*?</think>.*<answer>.*?</answer>"
40 |     responses = [completion[0]["content"] for completion in completions]
41 |     matches = [re.match(pattern, r, re.DOTALL) for r in responses]
42 |     return [0.5 if match else 0.0 for match in matches]
43 | 
44 | 
45 | def count_xml(text) -> float:
46 |     count = 0.0
47 |     if text.count("<think>\n") == 1:
48 |         count += 0.125
49 |     if text.count("\n</think>\n") == 1:
50 |         count += 0.125
51 |     if text.count("\n<answer>\n") == 1:
52 |         count += 0.125
53 |         count -= len(text.split("\n</answer>\n")[-1]) * 0.001  # 不以</answer>结尾扣除部分奖励分数
54 |     if text.count("\n</answer>") == 1:
55 |         count += 0.125
56 |         count -= (len(text.split("\n</answer>")[-1]) - 1) * 0.001  # 不以</answer>结尾扣除部分奖励分数
57 |     return count
58 | 
59 | 
60 | def xmlcount_reward_func(completions, **kwargs) -> list[float]:
61 |     """思维链不完整也给予一定的奖励分数"""
62 |     contents = [completion[0]["content"] for completion in completions]
63 |     return [count_xml(c) for c in contents]
64 | 
65 | 
66 | REWARD_FUNCS = {
67 |     'correctness_reward_func': correctness_reward_func,
68 |     'int_reward_func': int_reward_func,
69 |     'strict_format_reward_func': strict_format_reward_func,
70 |     'soft_format_reward_func': soft_format_reward_func,
71 |     'xmlcount_reward_func': xmlcount_reward_func
72 | }
73 | 


--------------------------------------------------------------------------------
/llms/train/deepseek-train/sft_train.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from modelscope import AutoTokenizer, AutoModelForCausalLM
 3 | from trl import SFTConfig, SFTTrainer
 4 | 
 5 | from utils import get_gsm8k_dataset
 6 | 
 7 | 
 8 | def train(args):
 9 |     training_args = SFTConfig(
10 |         output_dir=args.checkpoint_dir,
11 |         learning_rate=args.learning_rate,
12 |         adam_beta1=args.adam_beta1,
13 |         adam_beta2=args.adam_beta2,
14 |         weight_decay=args.weight_decay,
15 |         warmup_ratio=args.warmup_ratio,
16 |         lr_scheduler_type=args.lr_scheduler_type,
17 |         logging_steps=args.logging_steps,
18 |         bf16=args.bf16,
19 |         per_device_train_batch_size=args.per_device_train_batch_size,
20 |         gradient_accumulation_steps=args.gradient_accumulation_steps,
21 |         max_seq_length=args.max_seq_length,
22 |         num_train_epochs=args.epochs,
23 |         save_steps=args.save_steps,
24 |         save_strategy=args.save_strategy,
25 |         max_grad_norm=args.max_grad_norm,
26 |         log_on_each_node=False,
27 |         report_to="none"
28 |     )
29 | 
30 |     model = AutoModelForCausalLM.from_pretrained(
31 |         args.model_name_or_path,
32 |         torch_dtype=torch.bfloat16 if args.bf16 else None,
33 |         device_map=None,
34 |         cache_dir=args.cache_dir
35 |     ).to("cuda")
36 | 
37 |     tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir)
38 |     tokenizer.pad_token = tokenizer.eos_token
39 | 
40 |     trainer = SFTTrainer(
41 |         model=model,
42 |         processing_class=tokenizer,
43 |         args=training_args,
44 |         train_dataset=get_gsm8k_dataset(sft=True, cache_dir=args.cache_dir,
45 |                                         first_half=args.split_half=="first_half",
46 |                                         second_half=args.split_half=="second_half"),
47 |     )
48 |     trainer.train()
49 | 


--------------------------------------------------------------------------------
/llms/train/deepseek-train/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from datasets import IterableDataset
 4 | from modelscope.msdatasets import MsDataset
 5 | 
 6 | SYSTEM_PROMPT = """
 7 | Respond in the following format:
 8 | <think>
 9 | ...
10 | </think>
11 | <answer>
12 | ...
13 | </answer>
14 | """
15 | 
16 | XML_COT_FORMAT = """
17 | <think>
18 | {think}
19 | </think>
20 | <answer>
21 | {answer}
22 | </answer>
23 | """
24 | 
25 | 
26 | def extract_answer(text: str) -> Optional[str]:
27 |     if "####" not in text:
28 |         return None
29 |     return text.split("####")[1].strip()
30 | 
31 | 
32 | def extract_cot(text: str) -> str:
33 |     if "####" not in text:
34 |         return ""
35 |     cot = text.split("####")
36 |     return XML_COT_FORMAT.format(think=cot[0].strip(), answer=cot[1].strip())
37 | 
38 | 
39 | def get_gsm8k_dataset(split="train", sft=False, cache_dir=None, first_half=False, second_half=False) -> IterableDataset:
40 |     data = MsDataset.load('modelscope/gsm8k', subset_name='main', split=split, cache_dir=cache_dir)
41 |     if first_half:
42 |         data = data.shard(2, 0)
43 |     elif second_half:
44 |         data = data.shard(2, 1)
45 | 
46 |     if not sft:
47 |         data = data.map(lambda x: {
48 |             'prompt': [
49 |                 {'role': 'system', 'content': SYSTEM_PROMPT},
50 |                 {'role': 'user', 'content': x['question']}
51 |             ],
52 |             'answer': extract_answer(x['answer'])
53 |         })
54 |     else:
55 |         data = data.map(lambda x: {
56 |             'messages': [
57 |                 {'role': 'system', 'content': SYSTEM_PROMPT},
58 |                 {'role': 'user', 'content': x['question']},
59 |                 {'role': 'assistant', 'content': extract_cot(x['answer'])},
60 |             ]
61 |         })
62 |     return data
63 | 


--------------------------------------------------------------------------------
/multimodal/stable_diffusion/README.md:
--------------------------------------------------------------------------------
 1 | **这里提供了在Google Colab或者kaggle的GPU算力上免费搭建stable-diffusion-webui，进行AI绘画的notebook**
 2 | 
 3 | 原理讲解：
 4 | 
 5 | 1. **AI绘画Stable Diffusion原理之Autoencoder-Latent**：https://zhuanlan.zhihu.com/p/645939505
 6 | 2. **AI绘画Stable Diffusion原理之扩散模型DDPM**：https://zhuanlan.zhihu.com/p/645939505
 7 | 
 8 | stable diffusion有一个很大的优势就是基于各式各样的模型，我们可以进行不同风格的AI绘画。
 9 | 
10 | - stable diffusion的基础模型可以在[hugging face](https://huggingface.co/stabilityai)下载
11 | 
12 | - [C站](https://civitai.com/)可以下载各种风格的模型
13 | - stable diffusion：[Git](https://github.com/CompVis/latent-diffusion)｜[论文](https://arxiv.org/abs/2112.10752)
14 | - stable-diffusion-webui：[Git](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
15 | 
16 | ![变形金刚风格](变形金刚风格.png)


--------------------------------------------------------------------------------
/multimodal/stable_diffusion/变形金刚风格.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/multimodal/stable_diffusion/变形金刚风格.png


--------------------------------------------------------------------------------
/multitasklearning/README.md:
--------------------------------------------------------------------------------
1 | 1、**shared_bottom、mmoe、ple模型介绍:** [专栏](https://zhuanlan.zhihu.com/p/425209494)
2 | 
3 | 2、**多目标优化-Uncertainty Weight、GradNorm、Dynamic Weight Average、Pareto-Eficient**：[专栏](https://zhuanlan.zhihu.com/p/456089764)
4 | 
5 | 3、**STEM: 推荐模型中的维度坍塌&兴趣纠缠**：[专栏](https://zhuanlan.zhihu.com/p/19885938029)


--------------------------------------------------------------------------------
/multitasklearning/loss/dwa.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | T = 20
 4 | 
 5 | 
 6 | def dynamic_weight_average(loss_t_1, loss_t_2):
 7 |     """
 8 | 
 9 |     :param loss_t_1: 每个task上一轮的loss列表，并且为标量
10 |     :param loss_t_2:
11 |     :return:
12 |     """
13 |     # 第1和2轮，w初设化为1，lambda也对应为1
14 |     if not loss_t_1 or not loss_t_2:
15 |         return 1
16 | 
17 |     assert len(loss_t_1) == len(loss_t_2)
18 |     task_n = len(loss_t_1)
19 | 
20 |     w = [l_1 / l_2 for l_1, l_2 in zip(loss_t_1, loss_t_2)]
21 | 
22 |     lamb = [math.exp(v / T) for v in w]
23 | 
24 |     lamb_sum = sum(lamb)
25 | 
26 |     return [task_n * l / lamb_sum for l in lamb]
27 | 


--------------------------------------------------------------------------------
/multitasklearning/loss/grad_norm.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import tensorflow as tf
 3 | 
 4 | from typing import List
 5 | 
 6 | 
 7 | def l2_normalize(x):
 8 |     return tf.sqrt(tf.reduce_sum(tf.pow(x, 2)))
 9 | 
10 | 
11 | def grad_norm(loss_tensor_list: List[tf.Variable],
12 |               last_shared_weight: tf.Variable,
13 |               optimizer: tf.train.Optimizer,
14 |               loss_0: List,
15 |               alpha: float = 0.12):
16 |     """
17 | 
18 |     :param loss_tensor_list: 所有task的loss列表，tensor形式
19 |     :param last_shared_weight: 最后一层共享层的参数，tensor形式
20 |     :param optimizer: 优化器，tf.train.Optimizer
21 |     :param loss_0: 所有task的初设loss列表
22 |     :param alpha: grad_norm的超参数
23 |     :return:
24 |     """
25 |     # 多任务学习的task数量
26 |     task_n = len(loss_tensor_list)
27 | 
28 |     # 每个task的loss权重
29 |     w = [tf.get_variable("loss_weight_" + str(i), initializer=1.) for i in range(task_n)]
30 | 
31 |     # 每个task的正则化梯度
32 |     gradient_norm = [
33 |         l2_normalize(tf.gradients(w[i] * loss_tensor_list[i], last_shared_weight)[0]) for i in range(task_n)
34 |     ]
35 | 
36 |     gradient_norm_avg = tf.reduce_mean(gradient_norm)
37 | 
38 |     # 每个task的loss比率
39 |     loss_ratio = [loss_tensor_list[i] / loss_0[i] for i in range(task_n)]
40 | 
41 |     loss_ratio_avg = tf.reduce_mean(loss_ratio)
42 | 
43 |     # 每个task的相对学习速度
44 |     train_rate = [l / loss_ratio_avg for l in loss_ratio]
45 | 
46 |     # 正则化梯度loss
47 |     loss_grad = [tf.abs(gradient_norm[i] - gradient_norm_avg * tf.pow(train_rate[i], alpha)) for i in range(task_n)]
48 |     loss_grad = tf.reduce_sum(loss_grad)
49 |     # 仅对loss权重即w_i 做梯度方向传播
50 |     grad_op = optimizer.minimize(loss_grad, var_list=w)
51 | 
52 |     # 总loss
53 |     total_loss = tf.reduce_sum([w[i] * loss_tensor_list[i] for i in range(task_n)])
54 |     # loss_grad不参与网络层的参数的反向梯度更新
55 |     trainable_weights = tf.trainable_variables()
56 |     for v in w:
57 |         trainable_weights.remove(v)
58 |     train_op = optimizer.minimize(total_loss, var_list=trainable_weights - w)
59 | 
60 |     return total_loss, train_op, loss_grad, grad_op
61 | 
62 | # if __name__ == '__main__':
63 | #     last_shared_weight = tf.get_variable("last_shared_weight", shape=[100, 200])
64 | #     loss_tensor_list = [tf.reduce_sum(last_shared_weight * 0.01) for i in range(3)]
65 | #     optimizer = tf.train.AdamOptimizer()
66 | #     loss_0 = [math.log(3) for _ in range(3)]
67 | #     grad_norm(loss_tensor_list,
68 | #               last_shared_weight,
69 | #               optimizer,
70 | #               loss_0)
71 | #     print("")
72 | 


--------------------------------------------------------------------------------
/multitasklearning/loss/pareto_efficient.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reference: https://github.com/weberrr/PE-LTR
  3 | """
  4 | import os
  5 | 
  6 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
  7 | 
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | from scipy.optimize import minimize
 11 | from scipy.optimize import nnls
 12 | import time
 13 | 
 14 | 
 15 | seed = 3456
 16 | tf.set_random_seed(seed)
 17 | np.random.seed(seed)
 18 | 
 19 | batch_size = 2000
 20 | dim = 64
 21 | 
 22 | x = np.float32(np.random.rand(batch_size, dim))
 23 | 
 24 | # 回归
 25 | y = np.dot(x, np.random.rand(dim, 1)) + 0.3
 26 | 
 27 | # 二分类
 28 | y2 = np.random.randint(0, 2, [batch_size, 1])
 29 | 
 30 | # 不同task的loss的权重
 31 | weight_a = tf.placeholder(tf.float32, shape=[])
 32 | weight_b = tf.placeholder(tf.float32, shape=[])
 33 | 
 34 | # 共享的参数，可以借鉴GradNorm，仅使用最后一层共享网络的参数
 35 | with tf.variable_scope("shared_weight"):
 36 |     hidden = tf.layers.dense(x, dim // 2,
 37 |                              activation=tf.nn.relu,
 38 |                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
 39 | 
 40 | with tf.variable_scope("task_a"):
 41 |     y_pre = tf.layers.dense(hidden, 1,
 42 |                             kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
 43 |     y_pre = tf.squeeze(y_pre, axis=-1)
 44 |     loss_a = tf.reduce_mean(tf.square(y - y_pre))
 45 | 
 46 | with tf.variable_scope("task_b"):
 47 |     y2_pre = tf.layers.dense(hidden, 1,
 48 |                              activation=tf.nn.sigmoid,
 49 |                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
 50 | 
 51 |     # y2_pre = tf.squeeze(y2_pre, axis=-1)
 52 |     loss_b = y2 * tf.log(y2_pre) + (1 - y2) * tf.log(y2_pre)
 53 |     loss_b = tf.negative(loss_b)
 54 |     loss_b = tf.reduce_mean(loss_b)
 55 | 
 56 | loss = weight_a * loss_a + weight_b * loss_b
 57 | 
 58 | optimizer = tf.train.GradientDescentOptimizer(0.1)
 59 | 
 60 | with tf.variable_scope("pareto"):
 61 |     a_gradients = []
 62 |     b_gradients = []
 63 |     for w in tf.trainable_variables(scope="shared_weight"):
 64 |         a_gradients.append(tf.reshape(tf.gradients(loss_a, w), [-1, 1]))
 65 |         b_gradients.append(tf.reshape(tf.gradients(loss_b, w), [-1, 1]))
 66 | 
 67 |     a_gradients = tf.concat(a_gradients, axis=0)
 68 |     b_gradients = tf.concat(b_gradients, axis=0)
 69 | 
 70 | train = optimizer.minimize(loss)
 71 | 
 72 | sess = tf.Session()
 73 | sess.run(tf.global_variables_initializer())
 74 | 
 75 | 
 76 | def pareto_step(w, c, G):
 77 |     """
 78 |     ref:http://ofey.me/papers/Pareto.pdf
 79 |     K : the number of task
 80 |     M : the dim of NN's params
 81 |     :param W: # (K,1)
 82 |     :param C: # (K,1)
 83 |     :param G: # (K,M)
 84 |     :return:
 85 |     """
 86 |     GGT = np.matmul(G, np.transpose(G))  # (K, K)
 87 |     e = np.mat(np.ones(np.shape(w)))  # (K, 1)
 88 |     m_up = np.hstack((GGT, e))  # (K, K+1)
 89 |     m_down = np.hstack((np.transpose(e), np.mat(np.zeros((1, 1)))))  # (1, K+1)
 90 |     M = np.vstack((m_up, m_down))  # (K+1, K+1)
 91 |     z = np.vstack((-np.matmul(GGT, c), 1 - np.sum(c)))  # (K+1, 1)
 92 |     hat_w = np.matmul(np.matmul(np.linalg.inv(np.matmul(np.transpose(M), M)), M), z)  # (K+1, 1)
 93 |     hat_w = hat_w[:-1]  # (K, 1)
 94 |     hat_w = np.reshape(np.array(hat_w), (hat_w.shape[0],))  # (K,)
 95 |     c = np.reshape(np.array(c), (c.shape[0],))  # (K,)
 96 |     new_w = ASM(hat_w, c)
 97 |     return new_w
 98 | 
 99 | 
100 | def ASM(hat_w, c):
101 |     """
102 |     ref:
103 |     http://ofey.me/papers/Pareto.pdf,
104 |     https://stackoverflow.com/questions/33385898/how-to-include-constraint-to-scipy-nnls-function-solution-so-that-it-sums-to-1
105 |     :param hat_w: # (K,)
106 |     :param c: # (K,)
107 |     :return:
108 |     """
109 |     A = np.array([[0 if i != j else 1 for i in range(len(c))] for j in range(len(c))])
110 |     b = hat_w
111 |     x0, _ = nnls(A, b)
112 | 
113 |     def _fn(x, A, b):
114 |         return np.linalg.norm(A.dot(x) - b)
115 | 
116 |     cons = {'type': 'eq', 'fun': lambda x: np.sum(x) + np.sum(c) - 1}
117 |     bounds = [[0., None] for _ in range(len(hat_w))]
118 |     min_out = minimize(_fn, x0, args=(A, b), method='SLSQP', bounds=bounds, constraints=cons)
119 |     new_w = min_out.x + c
120 |     return new_w
121 | 
122 | 
123 | use_pareto = True
124 | w_a, w_b = 0.5, 0.5
125 | c_a, c_b = 0.4, 0.2
126 | for step in range(0, 100):
127 |     res = sess.run([a_gradients, b_gradients, train, loss, loss_a, loss_b],
128 |                    feed_dict={weight_a: w_a, weight_b: w_b})
129 | 
130 |     if use_pareto:
131 |         s = time.time()
132 |         weights = np.mat([[w_a], [w_b]])
133 |         paras = np.hstack((res[0], res[1]))
134 |         paras = np.transpose(paras)
135 |         w_a, w_b = pareto_step(weights, np.mat([[c_a], [c_b]]), paras)
136 |         print("pareto cost: {}".format(time.time() - s))
137 | 
138 |     l, l_a, l_b = res[3:]
139 |     print("step:{:0>2d} w_a:{:4f} w_b:{:4f} loss:{:4f} loss_a:{:4f} loss_b:{:4f} r:{:4f}".format(
140 |         step, w_a, w_b, l, l_a, l_b, l_a / l_b))
141 | 


--------------------------------------------------------------------------------
/multitasklearning/loss/uncertainty_weight.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def uncertainty_to_weigh_losses(loss_list):
 5 |     """
 6 |     所有task的loss列表，tensor格式
 7 |     :param loss_list:
 8 |     :return: tensor格式的综合loss
 9 |     """
10 |     loss_n = len(loss_list)
11 |     # 初始化`log(uncertainty_weight)`变量，来代替`uncertainty_weight`，可以避免后续计算`log(uncertainty_weight)`，出现Nan的问题
12 |     # 这里初始化的变量是乘以2的即`log(uncertainty_weight)*2`，来代替后续的平方计算
13 |     uncertainty_weight_log = [
14 |         tf.get_variable("uncertainty_weight_log_"+str(i), shape=(), initializer=[0.]
15 |                         ) for i in range(loss_n)
16 |     ]
17 | 
18 |     final_loss = []
19 |     for i in range(loss_n):
20 |         # log等式替换
21 |         final_loss.append(tf.div(loss_list[i], 2*tf.exp(uncertainty_weight_log[i])) + uncertainty_weight_log[i] / 2)
22 | 
23 |     return tf.add_n(final_loss)
24 | 


--------------------------------------------------------------------------------
/multitasklearning/shared_bottom.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | import tensorflow.contrib.slim as slim
 4 | 
 5 | 
 6 | class BaseMTL:
 7 | 
 8 |     def __init__(self, target_dict: dict,
 9 |                  sharing_layer_size: list,
10 |                  expert_layer_size: list,
11 |                  l2_reg: float,
12 |                  dropout: float):
13 |         """
14 | 
15 |         :param target_dict: 多目标的分类标签数量，如 {"click": 2, "like": 2}
16 |         :param sharing_layer_size: 共享层的维度, 如 [512]
17 |         :param expert_layer_size: 专家层的维度, 如 [256, 128]
18 |         :param l2_reg: 正则惩罚项
19 |         :param dropout:
20 |         """
21 |         self.target_dict = target_dict
22 |         self.l2_reg = l2_reg
23 |         self.dropout = dropout
24 |         self.sharing_layer_size = sharing_layer_size
25 |         self.expert_layer_size = expert_layer_size
26 | 
27 |     def __call__(self, inputs, is_training):
28 |         """
29 |         MTL网络层
30 |         :param inputs: 输入为 经过embedding层之后的特征(拼接dense特征)
31 |         :param is_training: 当前是否为训练阶段
32 |         :return:
33 |         """
34 |         with tf.variable_scope("share-bottom"):
35 |             sharing_layer = self._mlp_layer(inputs, self.sharing_layer_size, is_training=is_training,
36 |                                             l2_reg=self.l2_reg, dropout=self.dropout, use_bn=True)
37 | 
38 |         with tf.variable_scope("expert_layer"):
39 |             expert_layer = {}
40 |             for name in self.target_dict.keys():
41 |                 expert_layer[name] = self._mlp_layer(sharing_layer, self.expert_layer_size, is_training=is_training,
42 |                                                      l2_reg=self.l2_reg, dropout=self.dropout, use_bn=True)
43 | 
44 |         with tf.variable_scope("prediction"):
45 |             pred = {}
46 |             logits = {}
47 |             for name in self.target_dict.keys():
48 |                 output = tf.layers.dense(expert_layer[name], self.target_dict[name])
49 |                 logits[name] = tf.nn.softmax(output)
50 | 
51 |                 pred[name] = tf.argmax(logits[name])
52 | 
53 |         return logits, pred
54 | 
55 |     def _mlp_layer(self, inputs, sizes, is_training,
56 |                    l2_reg=0., dropout=0., use_bn=False, activation=tf.nn.relu):
57 |         """
58 |         标准的MLP网络层
59 |         :param inputs:
60 |         :param sizes: 全连接的维度，如 [256, 128]
61 |         :param is_training: 当前是否为训练阶段
62 |         :param l2_reg: 正则惩罚项
63 |         :param dropout:
64 |         :param use_bn: 是否使用batch_normalization
65 |         :param activation: 激活函数
66 |         :return:
67 |         """
68 |         output = None
69 | 
70 |         for units in sizes:
71 |             output = tf.layers.dense(inputs, units=units,
72 |                                      kernel_initializer=slim.variance_scaling_initializer(),
73 |                                      kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)
74 |                                      )
75 | 
76 |             if use_bn:
77 |                 output = tf.layers.batch_normalization(output, training=is_training)
78 | 
79 |             if activation is not None:
80 |                 output = activation(output)
81 | 
82 |             if is_training:
83 |                 output = tf.nn.dropout(output, 1 - dropout)
84 | 
85 |         return output
86 | 


--------------------------------------------------------------------------------
/multitasklearning/torch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/multitasklearning/torch/__init__.py


--------------------------------------------------------------------------------
/multitasklearning/torch/mmoe.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from collections import OrderedDict
  5 | from typing import List, Dict, Tuple, Union, Optional
  6 | 
  7 | from utils import DNN
  8 | 
  9 | 
 10 | class MMoe(nn.Module):
 11 |     """One Level MMoe.
 12 | 
 13 |     :param inputs_dim: Dimension of the inputs. e.g. {"click": 2, "like": 2}
 14 |     :param labels_dict: dict. The number of Labels
 15 |     :param num_experts: int. The number of Shared Experts
 16 |     :param expert_hidden_units: list of positive integer, the layer number and units in each expert layer.
 17 |     :param tower_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
 18 |     :param tower_hidden_units: list of positive integer, the layer number and units in each tower layer.
 19 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
 20 |     :param dnn_activation: Activation function to use in DNN
 21 |     :param dnn_use_bn: bool. Whether to use BatchNormalization before activation or not in DNN
 22 |     :return: A PyTorch model instance.
 23 | 
 24 |     """
 25 | 
 26 |     def __init__(self,
 27 |                  inputs_dim: int,
 28 |                  labels_dict: Dict[str, int],
 29 |                  num_experts: int,
 30 |                  expert_hidden_units: Union[List[int], Tuple[int]],
 31 |                  tower_hidden_units: Union[List[int], Tuple[int]] = (256, 128),
 32 |                  l2_reg_dnn: float = 0.,
 33 |                  dnn_dropout: float = 0.,
 34 |                  dnn_activation: Optional[str] = 'relu',
 35 |                  dnn_use_bn: bool = False,
 36 |                  device: str = 'cpu'):
 37 |         super(MMoe, self).__init__()
 38 | 
 39 |         self.labels_dict = labels_dict
 40 | 
 41 |         self.experts_dnn = nn.ModuleList([DNN(inputs_dim, expert_hidden_units,
 42 |                                               activation=dnn_activation, dropout_rate=dnn_dropout, use_bn=dnn_use_bn,
 43 |                                               ) for _ in range(num_experts)])
 44 |         self.gate_dnn = nn.ModuleList([DNN(inputs_dim, [num_experts], activation=None, use_bn=dnn_use_bn,
 45 |                                            ) for _ in labels_dict])
 46 | 
 47 |         self.task_tower = nn.ModuleList([DNN(expert_hidden_units[-1], tower_hidden_units, activation=dnn_activation,
 48 |                                              dropout_rate=dnn_dropout, use_bn=dnn_use_bn,
 49 |                                              ) for _ in labels_dict])
 50 |         self.task_dense = nn.ModuleList(
 51 |             [DNN(tower_hidden_units[-1], [labels_dict[name]], activation=None, bias=False) for name in labels_dict])
 52 | 
 53 |         self.l2_reg_dnn = l2_reg_dnn
 54 |         self.device = device
 55 |         self.to(device)
 56 | 
 57 |     @property
 58 |     def l2_reg_loss(self):
 59 |         """L2 Regularization Loss"""
 60 |         reg_loss = torch.zeros((1,), device=self.device)
 61 |         if self.l2_reg_dnn and self.l2_reg_dnn > 0.:
 62 |             for name, parameter in self.named_parameters():
 63 |                 if 'weight' in name:
 64 |                     reg_loss += torch.sum(self.l2_reg_dnn * torch.square(parameter))
 65 |         return reg_loss
 66 | 
 67 |     def forward(self, dnn_inputs: torch.Tensor) -> Dict[str, torch.Tensor]:
 68 |         outputs = OrderedDict()
 69 | 
 70 |         experts_output = []
 71 |         for dnn in self.experts_dnn:
 72 |             experts_output.append(dnn(dnn_inputs))
 73 | 
 74 |         for index, name in enumerate(self.labels_dict):
 75 |             gate = self.gate_dnn[index](dnn_inputs)
 76 |             tower_inputs = _merge_experts_with_gate(experts_output, gate)
 77 |             tower_output = self.task_tower[index](tower_inputs)
 78 |             task_output = self.task_dense[index](tower_output)
 79 |             outputs[name] = torch.softmax(task_output, dim=-1)
 80 | 
 81 |         return outputs
 82 | 
 83 | 
 84 | def _merge_experts_with_gate(experts: List[torch.Tensor],
 85 |                              gate: torch.Tensor):
 86 |     experts = torch.stack(experts, dim=1)
 87 | 
 88 |     gate_weight = torch.softmax(gate, dim=-1)
 89 |     gate_weight = torch.unsqueeze(gate_weight, dim=2)
 90 | 
 91 |     return torch.sum(experts * gate_weight, dim=1)
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 |     import numpy as np
 96 | 
 97 |     model = MMoe(inputs_dim=8,
 98 |                  labels_dict={"click": 2, "like": 2},
 99 |                  num_experts=2,
100 |                  expert_hidden_units=[256])
101 | 
102 |     outputs = model(torch.FloatTensor(np.random.random([4, 8])))
103 | 
104 |     print(outputs)
105 |     for name in outputs:
106 |         print(name, outputs[name].shape)
107 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/data/bert/bert_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_probs_dropout_prob": 0.1, 
 3 |   "directionality": "bidi", 
 4 |   "hidden_act": "gelu", 
 5 |   "hidden_dropout_prob": 0.1, 
 6 |   "hidden_size": 768, 
 7 |   "initializer_range": 0.02, 
 8 |   "intermediate_size": 3072, 
 9 |   "max_position_embeddings": 512, 
10 |   "num_attention_heads": 12, 
11 |   "num_hidden_layers": 12, 
12 |   "pooler_fc_size": 768, 
13 |   "pooler_num_attention_heads": 12, 
14 |   "pooler_num_fc_layers": 3, 
15 |   "pooler_size_per_head": 128, 
16 |   "pooler_type": "first_token_transform", 
17 |   "type_vocab_size": 2, 
18 |   "vocab_size": 21128
19 | }
20 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/data/example_no_sop.txt:
--------------------------------------------------------------------------------
 1 | 这个系列将记录下本人平时在深度学习方面觉得实用的一些trick，可能会包括性能提升和工程优化等方面。
 2 | 该系列的代码会更新到Github
 3 | 这张图就很清晰地阐述整个推荐系统的流程：
 4 | 从百万量级的视频库，根据用户的特征及上下文信息，通过召回模型，筛选出用户可能感兴趣的少量视频，称为候选集(百量级)；
 5 | 接着，再通过排序模型，将上一步的候选集，进行排序，最终呈现给用户。
 6 | 数据类别不均衡是很多场景任务下会遇到的一种问题。比如NLP中的命名实体识别NER，文本中许多都是某一种或者几种类型的实体，比如无需识别的不重要实体；又或者常见的分类任务，大部分数据的标签都是某几类。
 7 | 而我们又无法直接排除这些很少的类别的数据，因为这些类别也很重要，仍然需要模型去预测这些类别。
 8 | 有时会从数据层面缓解这种类别不均衡带来的影响，主要是过采样和欠采样。
 9 | 过采样：对于某些类别数据比较少，对它们进行重复采样，以达到相对平衡，重复采样的时候，有时也会对数据加上一点噪声；
10 | 欠采样：对于某些类别数据特别多，只使用部分数据，抛弃一些数据；
11 | 过采样可能导致这些类别产生过拟合的现象，而欠采样则容易导致模型的泛化性变差。
12 | 另外，比较常用的则是结合ensemble方法，则将数据切分为N部分，每部分都包含数据少的类别的所有样本和数据多的类别的部分样本，训练N个模型，最后进行集成。
13 | 缺点是，使用ensemble则会提高部署成本和带来性能问题。
14 | 版权声明：本文为CSDN博主「我就算饿死也不做程序员」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
15 | 原文链接：https://blog.csdn.net/sgyuanshi/article/details/127796507
16 | 上面也提到：召回网络的目标即可以理解为预测对于一个用户，在当前的上下文场景下，观看每个视频的概率。
17 | 那么，对于每个样本来说，所有视频都可能是正样本。假如有100W个视频，那么召回模型就变成一个100W分类模型了，这显然带来了很大的训练难度。
18 | 所以，就需要用到负采样了，论文这里讲得比较模糊，大概思路就是：
19 | ID特征经过embedding层和其他dense特征拼接，作为输入层；
20 | 接着，是共享网络层share-bottom，一般称为硬参数共享(hard parameter sharing)。这是通过底层的共享网络来实现不同task信息之间的共享；
21 | 然后，是专家网络层，即每个task有自己独立的网络层；
22 | 最后，每个task的专家网络层输出对应的预测值。


--------------------------------------------------------------------------------
/nlp/masked_language_model/data/example_sop.txt:
--------------------------------------------------------------------------------
 1 | 这个系列将记录下本人平时在深度学习方面觉得实用的一些trick，
 2 | 可能会包括性能提升和工程优化等方面。
 3 | 
 4 | 该系列的代码会更新到Github
 5 | 
 6 | 这张图就很清晰地阐述整个推荐系统的流程：
 7 | 从百万量级的视频库，根据用户的特征及上下文信息，通过召回模型，筛选出用户可能感兴趣的少量视频，称为候选集(百量级)；
 8 | 接着，再通过排序模型，将上一步的候选集，进行排序，最终呈现给用户。
 9 | 
10 | 数据类别不均衡是很多场景任务下会遇到的一种问题。比如NLP中的命名实体识别NER，文本中许多都是某一种或者几种类型的实体，比如无需识别的不重要实体；又或者常见的分类任务，大部分数据的标签都是某几类。
11 | 而我们又无法直接排除这些很少的类别的数据，因为这些类别也很重要，仍然需要模型去预测这些类别。
12 | 
13 | 有时会从数据层面缓解这种类别不均衡带来的影响，主要是过采样和欠采样。
14 | 过采样：对于某些类别数据比较少，对它们进行重复采样，以达到相对平衡，重复采样的时候，有时也会对数据加上一点噪声；
15 | 欠采样：对于某些类别数据特别多，只使用部分数据，抛弃一些数据；
16 | 过采样可能导致这些类别产生过拟合的现象，而欠采样则容易导致模型的泛化性变差。
17 | 
18 | 另外，比较常用的则是结合ensemble方法，则将数据切分为N部分，每部分都包含数据少的类别的所有样本和数据多的类别的部分样本，训练N个模型，最后进行集成。
19 | 缺点是，使用ensemble则会提高部署成本和带来性能问题。
20 | 
21 | 版权声明：本文为CSDN博主「我就算饿死也不做程序员」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
22 | 原文链接：https://blog.csdn.net/sgyuanshi/article/details/127796507
23 | 
24 | 上面也提到：召回网络的目标即可以理解为预测对于一个用户，在当前的上下文场景下，观看每个视频的概率。
25 | 那么，对于每个样本来说，所有视频都可能是正样本。假如有100W个视频，那么召回模型就变成一个100W分类模型了，这显然带来了很大的训练难度。
26 | 所以，就需要用到负采样了，论文这里讲得比较模糊，大概思路就是：
27 | 
28 | ID特征经过embedding层和其他dense特征拼接，作为输入层；
29 | 接着，是共享网络层share-bottom，一般称为硬参数共享(hard parameter sharing)。这是通过底层的共享网络来实现不同task信息之间的共享；
30 | 然后，是专家网络层，即每个task有自己独立的网络层；
31 | 最后，每个task的专家网络层输出对应的预测值。


--------------------------------------------------------------------------------
/nlp/masked_language_model/data/stopwords.txt:
--------------------------------------------------------------------------------
1 | 了
2 | 的
3 | 啊
4 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/pt/requirements.txt:
--------------------------------------------------------------------------------
1 | crc32c
2 | jieba
3 | transformers
4 | torch==1.9
5 | huggingface-hub==0.4.0
6 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/pt/run_finetuning.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from collections import OrderedDict
 3 | import torch
 4 | from transformers.models.bert.modeling_bert import BertModel, BertConfig
 5 | 
 6 | 
 7 | def main(model_path):
 8 |     model = BertModel(config=BertConfig.from_pretrained(os.path.join(model_path, 'config.json')))
 9 |     checkpoint = torch.load(os.path.join(model_path, 'pytorch_model.bin'), map_location='cpu')
10 | 
11 |     # 筛选出`BertModel`部分的权重，并提出权重名称的前缀`bert`
12 |     bert_state_dict = OrderedDict()
13 |     for key in checkpoint['model_state_dict']:
14 |         if key.startswith('bert.'):
15 |             bert_state_dict[key[5:]] = checkpoint['model_state_dict'][key]
16 | 
17 |     model.load_state_dict(bert_state_dict)
18 |     print(model)
19 | 
20 | 
21 | class YourModel(torch.nn.Module):
22 |     def __init__(self, model_path):
23 |         super().__init__()
24 | 
25 |         self.bert = BertModel(config=BertConfig.from_pretrained(os.path.join(model_path, 'config.json')))
26 |         checkpoint = torch.load(os.path.join(model_path, 'pytorch_model.bin'), map_location='cpu')
27 |         self.bert.load_state_dict(checkpoint)
28 |         print(self.bert)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     # just use the pretrained model like BertModel from transformer
33 |     # saved by run_pretraining.py[line 186]
34 |     model = BertModel.from_pretrained('../cache/bert-base-chinese-pretrained')
35 | 
36 |     # saved by run_pretraining.py[line 111]
37 |     # main('../cache/bert-base-chinese-pretrained')
38 |     # or
39 |     # model = YourModel('../cache/bert-base-chinese-pretrained')
40 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/pt/tfrecord/__init__.py:
--------------------------------------------------------------------------------
 1 | from tfrecord import tools
 2 | from tfrecord import torch
 3 | 
 4 | from tfrecord import example_pb2
 5 | from tfrecord import iterator_utils
 6 | from tfrecord import reader
 7 | from tfrecord import writer
 8 | 
 9 | from tfrecord.iterator_utils import *
10 | from tfrecord.reader import *
11 | from tfrecord.writer import *
12 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/pt/tfrecord/iterator_utils.py:
--------------------------------------------------------------------------------
 1 | """Iterator utils."""
 2 | 
 3 | from __future__ import division
 4 | 
 5 | import typing
 6 | import warnings
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def cycle(iterator_fn: typing.Callable) -> typing.Iterable[typing.Any]:
12 |     """Create a repeating iterator from an iterator generator."""
13 |     while True:
14 |         for element in iterator_fn():
15 |             yield element
16 | 
17 | 
18 | def sample_iterators(iterators: typing.List[typing.Iterator],
19 |                      ratios: typing.List[int],
20 |                      infinite: bool = True) -> typing.Iterable[typing.Any]:
21 |     """Retrieve info generated from the iterator(s) according to their
22 |     sampling ratios.
23 | 
24 |     Params:
25 |     -------
26 |     iterators: list of iterators
27 |         All iterators (one for each file).
28 | 
29 |     ratios: list of int
30 |         The ratios with which to sample each iterator.
31 |     
32 |     infinite: bool, optional, default=True
33 |         Whether the returned iterator should be infinite or not
34 | 
35 |     Yields:
36 |     -------
37 |     item: Any
38 |         Decoded bytes of features into its respective data types from
39 |         an iterator (based off their sampling ratio).
40 |     """
41 |     if infinite:
42 |         iterators = [cycle(iterator) for iterator in iterators]
43 |     else:
44 |         iterators = [iterator() for iterator in iterators]
45 |     ratios = np.array(ratios)
46 |     ratios = ratios / ratios.sum()
47 |     while iterators:
48 |         choice = np.random.choice(len(ratios), p=ratios)
49 |         try:
50 |             yield next(iterators[choice])
51 |         except StopIteration:
52 |             if iterators:
53 |                 del iterators[choice]
54 |                 ratios = np.delete(ratios, choice)
55 |                 ratios = ratios / ratios.sum()
56 | 
57 | 
58 | 
59 | def shuffle_iterator(iterator: typing.Iterator,
60 |                      queue_size: int) -> typing.Iterable[typing.Any]:
61 |     """Shuffle elements contained in an iterator.
62 | 
63 |     Params:
64 |     -------
65 |     iterator: iterator
66 |         The iterator.
67 | 
68 |     queue_size: int
69 |         Length of buffer. Determines how many records are queued to
70 |         sample from.
71 | 
72 |     Yields:
73 |     -------
74 |     item: Any
75 |         Decoded bytes of the features into its respective data type (for
76 |         an individual record) from an iterator.
77 |     """
78 |     buffer = []
79 |     try:
80 |         for _ in range(queue_size):
81 |             buffer.append(next(iterator))
82 |     except StopIteration:
83 |         warnings.warn("Number of elements in the iterator is less than the "
84 |                       f"queue size (N={queue_size}).")
85 |     while buffer:
86 |         index = np.random.randint(len(buffer))
87 |         try:
88 |             item = buffer[index]
89 |             buffer[index] = next(iterator)
90 |             yield item
91 |         except StopIteration:
92 |             yield buffer.pop(index)
93 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/pt/tfrecord/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from tfrecord.tools import tfrecord2idx
2 | 
3 | from tfrecord.tools.tfrecord2idx import create_index
4 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/pt/tfrecord/tools/tfrecord2idx.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import sys
 4 | import struct
 5 | 
 6 | 
 7 | def create_index(tfrecord_file: str, index_file: str) -> None:
 8 |     """Create index from the tfrecords file.
 9 | 
10 |     Stores starting location (byte) and length (in bytes) of each
11 |     serialized record.
12 | 
13 |     Params:
14 |     -------
15 |     tfrecord_file: str
16 |         Path to the TFRecord file.
17 | 
18 |     index_file: str
19 |         Path where to store the index file.
20 |     """
21 |     infile = open(tfrecord_file, "rb")
22 |     outfile = open(index_file, "w")
23 | 
24 |     while True:
25 |         current = infile.tell()
26 |         try:
27 |             byte_len = infile.read(8)
28 |             if len(byte_len) == 0:
29 |                 break
30 |             infile.read(4)
31 |             proto_len = struct.unpack("q", byte_len)[0]
32 |             infile.read(proto_len)
33 |             infile.read(4)
34 |             outfile.write(str(current) + " " + str(infile.tell() - current) + "\n")
35 |         except:
36 |             print("Failed to parse TFRecord.")
37 |             break
38 |     infile.close()
39 |     outfile.close()
40 | 
41 | 
42 | def main():
43 |     if len(sys.argv) < 3:
44 |         print("Usage: tfrecord2idx <tfrecord path> <index path>")
45 |         sys.exit()
46 | 
47 |     create_index(sys.argv[1], sys.argv[2])
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/pt/tfrecord/torch/__init__.py:
--------------------------------------------------------------------------------
1 | from tfrecord.torch import dataset
2 | 
3 | from tfrecord.torch.dataset import TFRecordDataset
4 | from tfrecord.torch.dataset import MultiTFRecordDataset
5 | 


--------------------------------------------------------------------------------
/nlp/masked_language_model/tf/requirements.txt:
--------------------------------------------------------------------------------
1 | jieba
2 | tensorflow-gpu==1.15.5
3 | 


--------------------------------------------------------------------------------
/nlp/sentence_bert/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/nlp/sentence_bert/__init__.py


--------------------------------------------------------------------------------
/nlp/sentence_bert/sbert/model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | from sentence_bert.bert import BertConfig
  4 | 
  5 | from sentence_bert.utils import get_sentence_emb
  6 | 
  7 | 
  8 | class SBERT:
  9 | 
 10 |     def __init__(self, bert_config_file, objective,
 11 |                  pooling_mode='mean',
 12 |                  num_labels=None,
 13 |                  margin=1.):
 14 |         """
 15 | 
 16 |         :param bert_config_file: bert_config配置文件路径
 17 |         :param objective: 微调的结构
 18 |         :param pooling_mode: 句向量pooling的策略
 19 |         :param num_labels: 对于classification微调结构，需设置labels的数量
 20 |         :param margin: 对于triplet结构，需设置
 21 |         """
 22 |         assert objective in ('classification', 'regression', 'triplet')
 23 |         assert pooling_mode in ('mean', 'max', 'cls')
 24 | 
 25 |         self.bert_config = BertConfig.from_json_file(bert_config_file)
 26 |         self.objective = objective
 27 |         self.num_labels = num_labels
 28 |         self.margin = margin
 29 |         self.pooling_mode = pooling_mode
 30 | 
 31 |     def __call__(self, inputs_a, inputs_b, is_training, labels=None, inputs_c=None):
 32 |         """
 33 | 
 34 |         :param inputs_a: 句子a的输入，dict形式
 35 |             :keyword input_ids: int32 Tensor [batch_size, max_seq_len]
 36 |             :keyword seq_len: int32 Tensor [batch_size]
 37 |         :param inputs_b: 句子b的输入，同inputs_a。如为triplet结构，则为positive sentence
 38 |         :param is_training:
 39 |         :param labels:
 40 |         :param inputs_c: 同inputs_a。仅triplet结构时使用，为negative sentence
 41 |         :return:
 42 |         """
 43 |         sentence_emb_a = get_sentence_emb(input_ids=inputs_a['input_ids'],
 44 |                                           seq_len=inputs_a['seq_len'],
 45 |                                           is_training=is_training,
 46 |                                           bert_config=self.bert_config,
 47 |                                           pooling_mode=self.pooling_mode)
 48 | 
 49 |         sentence_emb_b = get_sentence_emb(input_ids=inputs_b['input_ids'],
 50 |                                           seq_len=inputs_b['seq_len'],
 51 |                                           is_training=is_training,
 52 |                                           bert_config=self.bert_config,
 53 |                                           pooling_mode=self.pooling_mode)
 54 | 
 55 |         loss = None
 56 |         if self.objective == 'classification':
 57 |             logits = tf.layers.dense(
 58 |                 inputs=tf.concat([sentence_emb_a, sentence_emb_b, tf.abs(sentence_emb_a - sentence_emb_b)], axis=-1),
 59 |                 units=self.num_labels,
 60 |                 activation=tf.nn.softmax,
 61 |                 kernel_initializer=tf.variance_scaling_initializer()
 62 |             )
 63 | 
 64 |             prob = tf.argmax(logits, axis=-1)
 65 | 
 66 |             if labels is not None:
 67 |                 if len(labels) == 1:
 68 |                     labels = tf.one_hot(labels, self.num_labels)
 69 | 
 70 |                 epsilon = 1e-8
 71 |                 labels = tf.cast(labels, tf.float32)
 72 |                 loss = labels * tf.log(logits + epsilon) + (1 - labels) * tf.log(1 - logits + epsilon)
 73 |                 loss = tf.negative(loss)
 74 |                 loss = tf.reduce_mean(tf.reduce_sum(loss, axis=-1))
 75 | 
 76 |             return loss, logits, prob
 77 | 
 78 |         if self.objective == 'regression':
 79 |             cos_sim = tf.matmul(tf.nn.l2_normalize(sentence_emb_a, axis=-1),
 80 |                                 tf.nn.l2_normalize(sentence_emb_b, axis=-1))
 81 |             if labels is not None:
 82 |                 loss = self._mse(cos_sim, labels)
 83 | 
 84 |             return loss, cos_sim
 85 | 
 86 |         if self.objective == 'triplet':
 87 |             sentence_emb_c = get_sentence_emb(input_ids=inputs_c['input_ids'],
 88 |                                               seq_len=inputs_c['seq_len'],
 89 |                                               is_training=is_training,
 90 |                                               bert_config=self.bert_config,
 91 |                                               pooling_mode=self.pooling_mode)
 92 | 
 93 |             negative_distance = self._euclidean(sentence_emb_a, sentence_emb_c)
 94 |             positive_distance = self._euclidean(sentence_emb_a, sentence_emb_b)
 95 |             loss = tf.maximum(negative_distance - positive_distance + self.margin, 0)
 96 |             loss = tf.reduce_mean(loss)
 97 | 
 98 |             return loss, negative_distance, positive_distance
 99 | 
100 |     def _euclidean(self, t1, t2):
101 |         """欧式距离"""
102 |         return tf.sqrt(tf.reduce_mean(tf.pow(t1 - t2, 2)))
103 | 
104 |     def _mse(self, y1, y2):
105 |         """MSE loss"""
106 |         return tf.reduce_mean(tf.pow(y1 - y2, 2))
107 | 
108 |     def sentence_embedding(self, inputs):
109 |         """
110 |         根据输入获取句向量
111 |         :param inputs: dict格式
112 |             :keyword input_ids: int32 Tensor [batch_size, max_seq_len]
113 |             :keyword seq_len: int32 Tensor [batch_size]
114 |         :return:
115 |         """
116 |         return get_sentence_emb(input_ids=inputs['input_ids'],
117 |                                 seq_len=inputs['seq_len'],
118 |                                 is_training=False,
119 |                                 bert_config=self.bert_config,
120 |                                 pooling_mode=self.pooling_mode)
121 | 


--------------------------------------------------------------------------------
/nlp/sentence_bert/utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from sentence_bert.bert import BertModel
 4 | 
 5 | 
 6 | def get_sentence_emb(input_ids, seq_len, is_training, bert_config, pooling_mode):
 7 |     token_emb, cls_emb = get_bert_output(input_ids, seq_len, is_training, bert_config)
 8 | 
 9 |     if pooling_mode == 'mean':
10 |         sentence_emb = mean_pooling(token_emb, seq_len)
11 |     elif pooling_mode == 'max':
12 |         sentence_emb = max_pooling(token_emb, seq_len)
13 |     elif pooling_mode == 'cls':
14 |         sentence_emb = cls_emb
15 |     else:
16 |         raise ValueError("error pooling_mode")
17 | 
18 |     return sentence_emb
19 | 
20 | 
21 | def get_bert_output(input_ids, seq_len, is_training, bert_config):
22 |     max_len = input_ids.shape.as_list()[-1]
23 |     input_mask = tf.sequence_mask(seq_len, max_len, dtype=tf.int32)
24 |     segment_ids = tf.zeros_like(input_ids)
25 | 
26 |     # 在相同的variable_scope下，保证不同输入对应的bert模型是同一个，即参数绑定
27 |     model = BertModel(
28 |         config=bert_config,
29 |         is_training=is_training,
30 |         input_ids=input_ids,
31 |         input_mask=input_mask,
32 |         token_type_ids=segment_ids,
33 |         use_one_hot_embeddings=False,
34 |         scope="bert")
35 |     return model.get_sequence_output(), model.get_pooled_output()
36 | 
37 | 
38 | def mean_pooling(token_emb, seq_len):
39 |     mask = tf.sequence_mask(seq_len, dtype=tf.float32)
40 |     mask = tf.expand_dims(mask, axis=-1)
41 |     seq_len = tf.cast(tf.expand_dims(seq_len, axis=-1), tf.float32)
42 | 
43 |     token_mean_pooling = tf.reduce_sum(token_emb * mask, axis=1)
44 |     token_mean_pooling = token_mean_pooling / seq_len
45 |     return token_mean_pooling
46 | 
47 | 
48 | def max_pooling(token_emb, seq_len):
49 |     mask = tf.sequence_mask(seq_len, dtype=tf.float32)
50 |     mask = (1.0 - mask) * -10000.0
51 |     mask = tf.expand_dims(mask, axis=-1)
52 | 
53 |     token_mean_pooling = tf.reduce_max(token_emb + mask, axis=1)
54 |     return token_mean_pooling
55 | 


--------------------------------------------------------------------------------
/recommendation/README.md:
--------------------------------------------------------------------------------
 1 | # 推荐系统
 2 | 
 3 | 项目文件夹：[recommendation](https://github.com/QunBB/DeepLearning/tree/main/recommendation)
 4 | (本git的推荐系统使用的是tensorflow1.x，关于tensorflow2.x的实现可前往另外一个[git](https://github.com/QunBB/RecSys))
 5 | 
 6 | - **ctr训练提速(超大batch size)-CowClip**：[专栏](https://zhuanlan.zhihu.com/p/557451365)
 7 | - **基于二叉树的近似最近邻搜索-Annoy**: [专栏](https://zhuanlan.zhihu.com/p/714579473)
 8 | 
 9 | ## 1. Match(召回)
10 | 
11 | 项目文件夹：[recommendation/match](https://github.com/QunBB/DeepLearning/tree/main/recommendation/match)
12 | 
13 | - **多兴趣召回MIND**: [专栏](https://zhuanlan.zhihu.com/p/463064543)
14 | - **多兴趣召回ComiRec**: [专栏](https://zhuanlan.zhihu.com/p/568781562)
15 | 
16 | 
17 | 
18 | - **深入浅出地理解Youtube DNN推荐模型**: [专栏](https://zhuanlan.zhihu.com/p/405907646)
19 | - **引入对偶增强向量的双塔召回模型**: [专栏](https://zhuanlan.zhihu.com/p/608636233)
20 | 
21 | ## 2. Rank(排序)
22 | 
23 | 项目文件夹：[recommendation/rank](https://github.com/QunBB/DeepLearning/tree/main/recommendation/rank)
24 | 
25 | - **ctr特征重要性建模：FiBiNet&FiBiNet++模型**：[专栏](https://zhuanlan.zhihu.com/p/603262632)
26 | - **ctr预估之FMs系列:FM/FFM/FwFM/FEFM**：[专栏](https://zhuanlan.zhihu.com/p/613030015)
27 | - **ctr预估之DNN系列模型:FNN/PNN/DeepCrossing**：[专栏](https://zhuanlan.zhihu.com/p/623567076)
28 | - **ctr预估之Wide&Deep系列模型:DeepFM/DCN**：[专栏](https://zhuanlan.zhihu.com/p/631668163)
29 | - **ctr预估之Wide&Deep系列(下):NFM/xDeepFM**：[专栏](https://zhuanlan.zhihu.com/p/634584585)
30 | - **CTR特征建模：ContextNet & MaskNet(Twitter在用的排序模型)**：[专栏](https://zhuanlan.zhihu.com/p/660375034)
31 | - **CTR之行为序列建模用户兴趣：DIN**：[专栏](https://zhuanlan.zhihu.com/p/679852484)
32 | - **CTR之行为序列建模用户兴趣：DIEN**：[专栏](https://zhuanlan.zhihu.com/p/685855305)
33 | - **CTR之Session行为序列建模用户兴趣：DSIN**：[专栏](https://zhuanlan.zhihu.com/p/688338754)
34 | - **CTR之行为序列建模用户兴趣：Temporal Interest Network**：[专栏](https://zhuanlan.zhihu.com/p/7832498217)
35 | - **推荐模型中辅助排序损失的作用**：[专栏](https://zhuanlan.zhihu.com/p/10542978888)
36 | - **GwPFM&HMoE: 推荐模型中的维度坍塌&兴趣纠缠**：[专栏](https://zhuanlan.zhihu.com/p/19885938029)
37 | - **推荐系统特征建模: AdaF^2M^2**：[专栏](https://zhuanlan.zhihu.com/p/1903561181152641052)
38 | 
39 | ## 3. 多场景建模(Multi-Domain)
40 | 
41 | 项目文件夹：[recommendation/multidomain](https://github.com/QunBB/DeepLearning/tree/main/recommendation/multidomain)
42 | 
43 | - **多场景建模: STAR(Star Topology Adaptive Recommender)**：[专栏](https://zhuanlan.zhihu.com/p/717054800)
44 | - **多场景建模（二）: SAR-Net（Scenario-Aware Ranking Network）**：[专栏](https://zhuanlan.zhihu.com/p/718704281)
45 | - **多场景多任务建模（三）: M2M（Multi-Scenario Multi-Task Meta Learning）**：[专栏](https://zhuanlan.zhihu.com/p/939534954)
46 | - **多场景多任务建模（四）: PEPNet（Parameter and Embedding Personalized Network）**：[专栏](https://zhuanlan.zhihu.com/p/4552106145)


--------------------------------------------------------------------------------
/recommendation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/recommendation/__init__.py


--------------------------------------------------------------------------------
/recommendation/cow_clip.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 《CowClip: Reducing CTR Prediction Model Training Time from 12 hours to 10 minutes on 1 GPU》
  3 | 论文地址：https://arxiv.org/abs/2204.06240
  4 | 开源地址：https://github.com/bytedance/LargeBatchCTR
  5 | """
  6 | import tensorflow as tf
  7 | 
  8 | import os
  9 | 
 10 | # os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
 11 | 
 12 | 
 13 | def cow_clip(w, g, ratio=1, ids=None, pos=None, cnts=None, min_w=0.03):
 14 |     """
 15 | 
 16 |     :param w: embedding变量
 17 |     :param g: embedding变量的梯度
 18 |     :param ratio: 论文中的r
 19 |     :param ids: 去重的ID
 20 |     :param pos: tf.unique(pos).y 为去重后的ID位置索引
 21 |     :param cnts: ID的个数统计
 22 |     :param min_w: 论文中的 \zeta
 23 |     :return:
 24 |     """
 25 |     values = tf.gather(g.values, tf.unique(pos).y)
 26 |     clipnorm = tf.norm(tf.gather(w, ids), axis=-1)
 27 | 
 28 |     # 与论文一致，clipnorm先乘ratio，再与min_w取max
 29 |     # 但原作者实现是 clipnorm先与min_w取max，再乘ratio
 30 |     clipnorm = tf.maximum(ratio * clipnorm, min_w)
 31 | 
 32 |     clip_t = clipnorm * tf.cast(cnts, tf.float32)
 33 | 
 34 |     l2sum_row = tf.reduce_sum(values * values, axis=-1)
 35 |     pred = l2sum_row > 0
 36 |     l2sum_row_safe = tf.where(pred, l2sum_row, tf.ones_like(l2sum_row))
 37 |     l2norm_row = tf.sqrt(l2sum_row_safe)
 38 |     intermediate = values * tf.expand_dims(clip_t, -1)
 39 |     g_clip = intermediate / tf.expand_dims(tf.maximum(l2norm_row, clip_t), -1)
 40 | 
 41 |     # tensorflow中出现多次的ID的梯度是重复存储，所以这里进行复原
 42 |     g_clip = tf.repeat(g_clip, cnts, axis=0)
 43 |     indices = tf.repeat(ids, cnts)
 44 | 
 45 |     return tf.IndexedSlices(g_clip, indices, g.dense_shape)
 46 | 
 47 | 
 48 | if __name__ == '__main__':
 49 |     import numpy as np
 50 | 
 51 |     inputs_id = tf.placeholder(tf.int32, [None, 20])
 52 |     labels = tf.placeholder(tf.int32, [None])
 53 |     embedding_table = tf.get_variable('embedding_table', shape=[1000, 128])
 54 |     test_weight = tf.get_variable('test_weight', shape=[1, 128])
 55 | 
 56 |     # 存储ID embedding变量及对应的ID输入
 57 |     ids_variables_dict = {embedding_table.name: inputs_id}
 58 | 
 59 |     dnn_input = tf.reduce_max(tf.nn.embedding_lookup(embedding_table, inputs_id), axis=1)
 60 |     predictions = tf.layers.dense(dnn_input + test_weight, 1)
 61 |     predictions = tf.reshape(predictions, [-1])
 62 |     predictions = tf.nn.sigmoid(predictions)
 63 | 
 64 |     loss = tf.reduce_mean(tf.pow(tf.cast(labels, tf.float32) - predictions, 2))
 65 | 
 66 |     embedding_grad = []
 67 |     embedding_var = []
 68 |     dense_grad = []
 69 |     dense_var = []
 70 |     for var, grad in zip(tf.trainable_variables(), tf.gradients(loss, tf.trainable_variables())):
 71 |         print(var, grad)
 72 |         # 如果是经过embedding_lookup的ID类embedding，`梯度类型为IndexedSlices，其他为tensor
 73 |         if isinstance(grad, tf.IndexedSlices) and var.name in ids_variables_dict:
 74 |             unique_ids, pos, ids_count = tf.unique_with_counts(tf.reshape(ids_variables_dict[var.name], [-1]))
 75 |             clip_grad = cow_clip(var, grad, pos=pos, ids=unique_ids, cnts=ids_count)
 76 |             embedding_grad.append(clip_grad)
 77 |             embedding_var.append(var)
 78 |         else:
 79 |             dense_grad.append(grad)
 80 |             dense_var.append(var)
 81 | 
 82 |     optimizer = tf.train.AdamOptimizer()
 83 |     train_op = optimizer.apply_gradients(zip(embedding_grad + dense_grad, embedding_var + dense_var))
 84 | 
 85 |     ######################## 测试程序 ########################
 86 |     # sess = tf.Session()
 87 |     # sess.run(tf.global_variables_initializer())
 88 |     #
 89 |     # feed_dict = {
 90 |     #     # inputs_id: np.random.randint(0, 20, [1, 20]),
 91 |     #     inputs_id: [[14, 9, 5, 1, 3, 12, 14, 11, 12, 6, 1, 16, 13, 6, 16, 19, 13, 0, 3, 19]],
 92 |     #     labels: np.random.randint(0, 10, [10])
 93 |     # }
 94 |     #
 95 |     # # print(sess.run(embedding_grad, feed_dict=feed_dict))
 96 |     # print(sess.run(tf.unique_with_counts(tf.reshape(inputs_id, [-1])), feed_dict=feed_dict))
 97 |     # print(sess.run(inputs_id, feed_dict=feed_dict))
 98 |     # grad_v, grad_i = sess.run([embedding_grad[0].values, embedding_grad[0].indices], feed_dict=feed_dict)
 99 |     # print(grad_v.shape, grad_v[0], grad_v[6], grad_v[1])
100 |     # print(grad_i.shape, grad_i)
101 |     #
102 |     # for _ in range(1000):
103 |     #     _, _loss = sess.run([train_op, loss],
104 |     #                         feed_dict={
105 |     #                             inputs_id: np.random.randint(0, 20, [32, 20]),
106 |     #                             labels: np.random.randint(0, 1, [32])
107 |     #                         }
108 |     #                         )
109 |     #     print(_loss)
110 | 


--------------------------------------------------------------------------------
/recommendation/multidomain/README.md:
--------------------------------------------------------------------------------
1 | # `_unittest.py`包含所有模型的测试用例


--------------------------------------------------------------------------------
/recommendation/rank/README.md:
--------------------------------------------------------------------------------
1 | # `_unittest.py`包含所有模型的测试用例


--------------------------------------------------------------------------------
/recommendation/rank/deepcrossing.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 论文：Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features
 3 | 
 4 | 地址：https://www.kdd.org/kdd2016/papers/files/adf0975-shanA.pdf
 5 | """
 6 | from typing import List, Union
 7 | import tensorflow as tf
 8 | 
 9 | from ..utils.core import dnn_layer
10 | 
11 | 
12 | class DeepCrossing:
13 |     def __init__(self,
14 |                  residual_size: List[int],
15 |                  l2_reg: float = 0.,
16 |                  dropout: float = 0.,
17 |                  use_bn: bool = True
18 |                  ):
19 |         """
20 | 
21 |         :param residual_size: 每一层残差网络的中间层size
22 |         :param l2_reg:
23 |         :param dropout:
24 |         :param use_bn:
25 |         """
26 |         self.residual_size = residual_size
27 |         self.l2_reg = l2_reg
28 |         self.dropout = dropout
29 |         self.use_bn = use_bn
30 | 
31 |     def __call__(self,
32 |                  embeddings: Union[List[tf.Tensor], tf.Tensor],
33 |                  is_training: bool = True):
34 |         if isinstance(embeddings, list):
35 |             embeddings = tf.concat(embeddings, axis=-1)
36 | 
37 |         if embeddings.shape.ndims != 2:
38 |             raise ValueError('Input tensor must have rank 2')
39 | 
40 |         residual_output = embeddings
41 |         for size in self.residual_size:
42 |             residual_output = self.residual_layer(residual_output, size, is_training)
43 | 
44 |         output = tf.layers.dense(residual_output, 1, activation=tf.nn.sigmoid,
45 |                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg),
46 |                                  kernel_initializer=tf.glorot_normal_initializer())
47 | 
48 |         return tf.reshape(output, [-1])
49 | 
50 |     def residual_layer(self, inputs, hidden_units, is_training):
51 |         dim = inputs.shape.as_list()[-1]
52 |         layer_output = dnn_layer(inputs, hidden_units=[hidden_units, dim],
53 |                                  dropout=self.dropout,
54 |                                  activation=tf.nn.relu,
55 |                                  use_bn=self.use_bn,
56 |                                  l2_reg=self.l2_reg,
57 |                                  is_training=is_training)
58 |         return inputs + layer_output
59 | 


--------------------------------------------------------------------------------
/recommendation/rank/deepfm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | > 论文：DeepFM: A Factorization-Machine based Neural Network for CTR Prediction
 3 | >
 4 | > 地址：https://www.ijcai.org/proceedings/2017/0239.pdf
 5 | """
 6 | import tensorflow as tf
 7 | from typing import List, Optional, Callable
 8 | from typing import Dict as OrderedDictType
 9 | from functools import partial
10 | 
11 | from .fms import FMs
12 | from ..utils.type_declaration import LinearTerms, FMType, Field
13 | from ..utils.core import dnn_layer
14 | 
15 | 
16 | class DeepFM:
17 |     def __init__(self,
18 |                  fields_list: List[Field],
19 |                  dnn_hidden_units: List[int],
20 |                  dnn_activation: Optional[Callable] = None,
21 |                  dnn_dropout: Optional[float] = 0.,
22 |                  dnn_use_bn: Optional[bool] = True,
23 |                  dnn_l2_reg: float = 0.,
24 |                  linear_type: LinearTerms = LinearTerms.LW,
25 |                  model_type: FMType = FMType.FM,
26 |                  emb_l2_reg: float = 0.
27 |                  ):
28 |         self.fm = FMs(fields_list, linear_type, model_type, emb_l2_reg)
29 | 
30 |         self.dnn_layer = partial(dnn_layer, hidden_units=dnn_hidden_units, activation=dnn_activation,
31 |                                  dropout=dnn_dropout, use_bn=dnn_use_bn, l2_reg=dnn_l2_reg)
32 |         self.dnn_l2_reg = dnn_l2_reg
33 | 
34 |     def __call__(self, sparse_inputs_dict: OrderedDictType[str, tf.Tensor],
35 |                  dense_inputs_dict: OrderedDictType[str, tf.Tensor],
36 |                  is_training: bool = True):
37 |         """
38 |         未经过embedding layer的输入
39 |         :param sparse_inputs_dict: 离散特征，经过LabelEncoder之后的输入
40 |         :param dense_inputs_dict: 连续值特征
41 |         :return:
42 |         """
43 |         fm_logit = self.fm(sparse_inputs_dict, dense_inputs_dict, add_sigmoid=False)
44 | 
45 |         embedding_output = self.fm.get_embedding_output()
46 |         embedding_output = tf.concat(embedding_output, axis=1)
47 | 
48 |         dnn_output = self.dnn_layer(embedding_output, is_training=is_training)
49 |         dnn_logit = tf.layers.dense(dnn_output, 1,
50 |                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(self.dnn_l2_reg),
51 |                                     kernel_initializer=tf.glorot_normal_initializer())
52 | 
53 |         output = tf.nn.sigmoid(fm_logit + tf.squeeze(dnn_logit, axis=1))
54 | 
55 |         return output
56 | 


--------------------------------------------------------------------------------
/recommendation/rank/fibinet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 论文：FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction
 3 | 
 4 | 地址：https://arxiv.org/abs/1905.09433
 5 | 
 6 | 论文：FiBiNet++:Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction
 7 | 
 8 | 地址：https://arxiv.org/pdf/2209.05016.pdf
 9 | """
10 | import tensorflow as tf
11 | from typing import List, Callable
12 | 
13 | from ..utils.core import dnn_layer
14 | from ..utils.interaction import SENet, BiLinear
15 | 
16 | 
17 | class FiBiNet:
18 |     def __init__(self,
19 |                  dnn_units: List[int],
20 |                  dropout: float,
21 |                  reduction_ratio: int,
22 |                  num_groups: int,
23 |                  bilinear_output_size: int,
24 |                  bilinear_type: str,
25 |                  dnn_activation: Callable = tf.nn.relu,
26 |                  dnn_use_bn: bool = True,
27 |                  dnn_l2_reg: float = 0.,
28 |                  bilinear_plus: bool = True,
29 |                  equal_dim: bool = True):
30 |         """
31 |         FiBiNet和FiBiNet++模型，支持不同field embeddings的size不等
32 |         :param dnn_units: MLP层的隐藏层size列表
33 |         :param dropout:
34 |         :param reduction_ratio: SENet中的缩减比率
35 |         :param num_groups: SENet+ embedding分割的group数目
36 |         :param bilinear_output_size: 双线性交互层的输出size
37 |         :param bilinear_type: 双线性交互类型，['all', 'each', 'interaction']，支持其中一种
38 |         :param dnn_activation: MLP层的激活函数
39 |         :param dnn_use_bn: MLP层是否使用normalization
40 |         :param dnn_l2_reg: MLP层的参数正则化
41 |         :param bilinear_plus: 是否使用bi-linear+
42 |         :param equal_dim: 所有field的embeddings的size是否相同
43 |         """
44 |         self.dnn_units = dnn_units
45 |         self.dnn_activation = dnn_activation
46 |         self.dnn_use_bn = dnn_use_bn
47 |         self.dnn_l2_reg = dnn_l2_reg
48 |         self.dropout = dropout
49 |         self.bilinear = BiLinear(output_size=bilinear_output_size,
50 |                                  bilinear_type=bilinear_type,
51 |                                  bilinear_plus=bilinear_plus,
52 |                                  equal_dim=equal_dim)
53 |         self.senet = SENet(reduction_ratio=reduction_ratio,
54 |                            num_groups=num_groups)
55 | 
56 |     def __call__(self, sparse_embeddings_list: List[tf.Tensor],
57 |                  dense_embeddings_list: List[tf.Tensor],
58 |                  is_training: bool = True):
59 |         sparse_embeddings_list = [tf.contrib.layers.layer_norm(inputs=emb,
60 |                                                                begin_norm_axis=-1,
61 |                                                                begin_params_axis=-1,
62 |                                                                scope=f'sparse_ln_{i}')
63 |                                   for i, emb in enumerate(sparse_embeddings_list)]
64 |         dense_embeddings_list = [tf.layers.batch_normalization(inputs=emb, name=f'dense_bn_{i}', training=is_training)
65 |                                  for i, emb in enumerate(dense_embeddings_list)]
66 |         senet_output = self.senet(sparse_embeddings_list + dense_embeddings_list)
67 |         bilinear_output = self.bilinear(sparse_embeddings_list + dense_embeddings_list)
68 | 
69 |         output = dnn_layer(inputs=tf.concat([senet_output, bilinear_output], axis=-1),
70 |                            is_training=is_training,
71 |                            hidden_units=self.dnn_units,
72 |                            activation=self.dnn_activation,
73 |                            dropout=self.dropout,
74 |                            use_bn=self.dnn_use_bn,
75 |                            l2_reg=self.dnn_l2_reg)
76 |         output = tf.layers.dense(output, 1, activation=tf.nn.sigmoid,
77 |                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(self.dnn_l2_reg),
78 |                                  kernel_initializer=tf.glorot_normal_initializer())
79 |         return tf.reshape(output, [-1])
80 | 


--------------------------------------------------------------------------------
/recommendation/rank/fms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 论文：Factorization Machines
  3 | 地址：https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf
  4 | 
  5 | 论文：Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising
  6 | 地址：https://arxiv.org/pdf/1806.03514.pdf
  7 | 
  8 | 论文：Field-Embedded Factorization Machines for Click-through rate prediction
  9 | 地址：https://arxiv.org/pdf/2009.09931.pdf
 10 | """
 11 | import itertools
 12 | from typing import Dict as OrderedDictType
 13 | from typing import List
 14 | 
 15 | import tensorflow as tf
 16 | 
 17 | from ..utils.interaction import LinearEmbedding
 18 | from ..utils.type_declaration import LinearTerms, FMType, Field
 19 | 
 20 | 
 21 | class FMs:
 22 |     def __init__(self,
 23 |                  fields_list: List[Field],
 24 |                  linear_type: LinearTerms = LinearTerms.LW,
 25 |                  model_type: FMType = FMType.FM,
 26 |                  l2_reg: float = 0.):
 27 |         self.num_fields = len(fields_list)
 28 | 
 29 |         embedding_dim = fields_list[0].dim  # 所有field embeddings的维度应该相同
 30 | 
 31 |         if model_type == FMType.FwFM:
 32 |             self.interaction_strengths = tf.get_variable('interaction_strengths',
 33 |                                                          shape=[self.num_fields, self.num_fields],
 34 |                                                          regularizer=tf.contrib.layers.l2_regularizer(l2_reg))
 35 |             self.interaction_func = self._fwfm_interaction
 36 |         elif model_type == FMType.FEFM:
 37 |             self.interaction_strengths = tf.get_variable('interaction_strengths',
 38 |                                                          shape=[self.num_fields, self.num_fields, embedding_dim, embedding_dim],
 39 |                                                          regularizer=tf.contrib.layers.l2_regularizer(l2_reg))
 40 |             self.interaction_func = self._fefm_interaction
 41 |         else:
 42 |             self.interaction_func = self._fm_interaction
 43 | 
 44 |         # For DeepFM
 45 |         self._embedding_output = []
 46 | 
 47 |         self.linear = LinearEmbedding(fields_list, linear_type)
 48 | 
 49 |         self.global_w = tf.get_variable('global_w', shape=[1], dtype=tf.float32,
 50 |                                         initializer=tf.zeros_initializer())
 51 | 
 52 |     def __call__(self,
 53 |                  sparse_inputs_dict: OrderedDictType[str, tf.Tensor],
 54 |                  dense_inputs_dict: OrderedDictType[str, tf.Tensor],
 55 |                  add_sigmoid: bool = True):
 56 |         """
 57 |         未经过embedding layer的输入
 58 |         :param sparse_inputs_dict: 离散特征，经过LabelEncoder之后的输入
 59 |         :param dense_inputs_dict:
 60 |         :return:
 61 |         """
 62 |         assert self.num_fields == len(sparse_inputs_dict) + len(dense_inputs_dict)
 63 | 
 64 |         embeddings, linear_logit = self.linear(sparse_inputs_dict, dense_inputs_dict)
 65 | 
 66 |         self._embedding_output = embeddings
 67 | 
 68 |         fms_logit = self.interaction_func(embeddings)
 69 | 
 70 |         final_logit = linear_logit + fms_logit + self.global_w
 71 | 
 72 |         if add_sigmoid:
 73 |             final_logit = tf.nn.sigmoid(final_logit)
 74 | 
 75 |         return final_logit
 76 | 
 77 |     def _fm_interaction(self, interactions: List[tf.Tensor]):
 78 |         interactions = tf.stack(interactions, axis=1)
 79 |         square_of_sum = tf.square(tf.reduce_sum(
 80 |             interactions, axis=1, keep_dims=True))
 81 |         sum_of_square = tf.reduce_sum(
 82 |             interactions * interactions, axis=1, keep_dims=True)
 83 |         fm_logit = square_of_sum - sum_of_square
 84 |         fm_logit = 0.5 * tf.reduce_sum(fm_logit, axis=2, keep_dims=False)
 85 | 
 86 |         return tf.reshape(fm_logit, [-1])
 87 | 
 88 |     def _fwfm_interaction(self, interactions: List[tf.Tensor]):
 89 |         logits = []
 90 |         for i, j in itertools.combinations(range(self.num_fields), 2):
 91 |             r_ij = self.interaction_strengths[i, j]
 92 |             vx_i = interactions[i]
 93 |             vx_j = interactions[j]
 94 |             logits.append(tf.reduce_sum(r_ij * vx_i * vx_j, axis=1))
 95 | 
 96 |         return tf.add_n(logits)
 97 | 
 98 |     def _fefm_interaction(self, interactions: List[tf.Tensor]):
 99 |         logits = []
100 |         for i, j in itertools.combinations(range(self.num_fields), 2):
101 |             w_ij = self.interaction_strengths[i, j]
102 |             vx_i = interactions[i]
103 |             vx_j = interactions[j]
104 | 
105 |             _logit = tf.matmul(tf.matmul(tf.expand_dims(vx_i, axis=1), w_ij),
106 |                                tf.expand_dims(vx_j, axis=2))
107 |             logits.append(tf.reshape(_logit, [-1]))
108 | 
109 |         return tf.add_n(logits)
110 | 
111 |     def get_embedding_output(self):
112 |         return self._embedding_output
113 | 


--------------------------------------------------------------------------------
/recommendation/rank/fnn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 论文：Deep Learning over Multi-field Categorical Data: A Case Study on User Response Prediction
  3 | 
  4 | 地址：https://arxiv.org/pdf/1601.02376.pdf
  5 | """
  6 | from typing import Dict as OrderedDictType
  7 | from typing import List, Union
  8 | 
  9 | import tensorflow as tf
 10 | from dataclasses import dataclass
 11 | 
 12 | from ..utils.core import dnn_layer
 13 | from ..utils.train_utils import get_assignment_map_from_checkpoint
 14 | 
 15 | 
 16 | @dataclass
 17 | class Field:
 18 |     name: str
 19 |     vocabulary_size: int = 1  # dense类型为1
 20 | 
 21 | 
 22 | class FNN:
 23 |     def __init__(self,
 24 |                  fields_list: List[Field],
 25 |                  embedding_dim: int,
 26 |                  dnn_hidden_units: List[int],
 27 |                  dropout: float = 0.,
 28 |                  l2_reg: float = 0.,
 29 |                  use_bn: bool = False,
 30 |                  dnn_l2_reg: float = 0.,
 31 |                  fms_checkpoint: str = None
 32 |                  ):
 33 |         """
 34 | 
 35 |         :param fields_list:
 36 |         :param embedding_dim: 特征向量的dim
 37 |         :param dnn_hidden_units: 隐藏层的size列表
 38 |         :param dropout:
 39 |         :param l2_reg: 特征向量的l2正则项力度
 40 |         :param use_bn: 全连接层是否使用batch_normalization
 41 |         :param dnn_l2_reg: 全连接层的权重l2正则项力度
 42 |         :param fms_checkpoint: 预训练的FMs模型路径
 43 |         """
 44 |         self.embeddings_table = {}
 45 |         self.weights_table = {}
 46 |         self.num_fields = len(fields_list)
 47 | 
 48 |         for field in fields_list:
 49 |             # embeddings 隐向量
 50 |             self.embeddings_table[field.name] = tf.get_variable('emb_' + field.name,
 51 |                                                                 shape=[field.vocabulary_size, embedding_dim],
 52 |                                                                 regularizer=tf.contrib.layers.l2_regularizer(l2_reg))
 53 | 
 54 |             # 线性项权重
 55 |             self.weights_table[field.name] = tf.get_variable('w_' + field.name, shape=[field.vocabulary_size],
 56 |                                                              regularizer=tf.contrib.layers.l2_regularizer(l2_reg))
 57 | 
 58 |         self.dnn_hidden_units = dnn_hidden_units
 59 |         self.dropout = dropout
 60 |         self.dnn_l2_reg = dnn_l2_reg
 61 |         self.use_bn = use_bn
 62 | 
 63 |         # 加载预训练的FMs模型向量
 64 |         if fms_checkpoint:
 65 |             tvars = tf.trainable_variables()
 66 |             assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint(tvars, fms_checkpoint)
 67 |             tf.train.init_from_checkpoint(fms_checkpoint, assignment_map)
 68 |             tf.logging.info("**** Trainable Variables ****")
 69 |             for var in tvars:
 70 |                 init_string = ""
 71 |                 if var.name in initialized_variable_names:
 72 |                     init_string = ", *INIT_FROM_CKPT*"
 73 |                 tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
 74 |                                 init_string)
 75 | 
 76 |     def _get_linear_logit(self,
 77 |                           w: tf.Tensor,
 78 |                           i: Union[int, tf.Tensor] = 0,
 79 |                           x: Union[int, tf.Tensor] = 1,
 80 |                           ):
 81 |         """线性项计算"""
 82 | 
 83 |         return tf.gather(w, i) * x
 84 | 
 85 |     def __call__(self,
 86 |                  sparse_inputs_dict: OrderedDictType[str, tf.Tensor],
 87 |                  dense_inputs_dict: OrderedDictType[str, tf.Tensor],
 88 |                  is_training: bool = True):
 89 |         dense_real_layer = []
 90 | 
 91 |         # 类别特征: (w, v)
 92 |         for name, x in sparse_inputs_dict.items():
 93 |             v = tf.nn.embedding_lookup(self.embeddings_table[name], x)
 94 |             w = self._get_linear_logit(w=self.weights_table[name], i=x)
 95 |             dense_real_layer.append(tf.concat([tf.expand_dims(w, axis=1), v], axis=1))
 96 | 
 97 |         # 数值特征: v * w
 98 |         for name, x in dense_inputs_dict.items():
 99 |             v = tf.reshape(self.embeddings_table[name][0], [1, -1])
100 |             w = self._get_linear_logit(w=self.weights_table[name], x=x)
101 |             dense_real_layer.append(tf.expand_dims(w, axis=1) * v)
102 | 
103 |         dense_real_layer = tf.concat(dense_real_layer, axis=1)
104 | 
105 |         output = dnn_layer(dense_real_layer, self.dnn_hidden_units, activation=tf.nn.tanh,
106 |                            is_training=is_training, use_bn=self.use_bn, l2_reg=self.dnn_l2_reg, dropout=self.dropout)
107 | 
108 |         output = tf.layers.dense(output, 1, activation=tf.nn.sigmoid,
109 |                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(self.dnn_l2_reg),
110 |                                  kernel_initializer=tf.glorot_normal_initializer())
111 | 
112 |         return tf.reshape(output, [-1])
113 | 


--------------------------------------------------------------------------------
/recommendation/rank/hmoe.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Ads Recommendation in a Collapsed and Entangled World
  3 | 
  4 | KDD'2024：https://arxiv.org/abs/2403.00793
  5 | """
  6 | from collections import defaultdict
  7 | from functools import partial
  8 | from typing import Optional, Callable, List, Dict, Any
  9 | 
 10 | import tensorflow as tf
 11 | 
 12 | from .interaction_expert import Expert
 13 | from ..utils.core import dnn_layer
 14 | 
 15 | 
 16 | class HMoE:
 17 |     def __init__(self,
 18 |                  expert_group: Dict[str, Dict[Expert, Dict[str, Any]]],
 19 |                  dnn_hidden_units: List[int],
 20 |                  gate_weighted: bool = False,
 21 |                  sum_weighted: bool = False,
 22 |                  dropout: float = 0.,
 23 |                  l2_reg: float = 0.,
 24 |                  dnn_activation: Optional[Callable] = tf.nn.relu,
 25 |                  dnn_use_bn: bool = True,
 26 |                  ):
 27 |         """Heterogeneous Mixture-of-Experts with Multi-Embedding
 28 | 
 29 |         :param expert_group: 每一组专家网络，key是组名，value是专家网络类+参数的字段
 30 |         :param dnn_hidden_units: DNN的每一层隐藏层大小
 31 |         :param gate_weighted: 是否对多个专家输出进行门控的加权求和
 32 |         :param sum_weighted: 是否对多个专家输出进行简单的加权求和
 33 |         :param dropout:
 34 |         :param l2_reg: 正则惩罚
 35 |         :param dnn_activation: 激活函数
 36 |         :param dnn_use_bn: 是否使用batch_normalization
 37 |         """
 38 |         num_experts = 0
 39 |         self.expert_group = defaultdict(list)
 40 |         # 初始化专家网络模型
 41 |         for group in expert_group:
 42 |             for expert in expert_group[group]:
 43 |                 self.expert_group[group].append(
 44 |                     expert.init_layer(**expert_group[group][expert])
 45 |                 )
 46 |                 num_experts += 1
 47 | 
 48 |         self.dnn_layer = partial(dnn_layer, hidden_units=dnn_hidden_units, activation=dnn_activation,
 49 |                                  dropout=dropout, use_bn=dnn_use_bn, l2_reg=l2_reg)
 50 | 
 51 |         self.gate_weighted = gate_weighted
 52 |         self.sum_weighted = sum_weighted
 53 |         if sum_weighted:
 54 |             self.weights = tf.get_variable("weight", shape=[1, num_experts, 1], initializer=tf.initializers.ones())
 55 | 
 56 |     def __call__(self,
 57 |                  group_embeddings: Dict[str, List[tf.Tensor]],
 58 |                  is_training: bool = True):
 59 |         """
 60 | 
 61 |         :param group_embeddings: 每一组经过embedding layer的输入，分组要与`expert_group`对应，同一个分组是共享同一个embedding table
 62 |         :param is_training:
 63 |         :return:
 64 |         """
 65 |         experts_output = defaultdict(list)
 66 |         # 每一组的专家交互
 67 |         for group in self.expert_group:
 68 |             embeddings = group_embeddings[group]
 69 |             for i, expert in enumerate(self.expert_group[group]):
 70 |                 # 专家网络交互输出
 71 |                 interaction = expert(embeddings, is_training=is_training)
 72 |                 # 接一层MLPs
 73 |                 interaction = self.dnn_layer(interaction, is_training=is_training, scope=f"{group}_expert_{i}")
 74 |                 experts_output[group].append(interaction)
 75 | 
 76 |         final_interaction = []
 77 |         for group in group_embeddings:
 78 |             final_interaction.extend(experts_output[group])
 79 |         # 多个专家聚合
 80 |         if self.gate_weighted:  # 门控的加权求和
 81 |             scores = []
 82 |             for group in group_embeddings:
 83 |                 scores.append(
 84 |                     tf.layers.dense(group_embeddings[group], len(self.expert_group[group]))
 85 |                 )
 86 | 
 87 |             scores = tf.nn.softmax(tf.concat(scores, axis=-1), axis=-1)
 88 |             final_interaction = tf.squeeze(
 89 |                 tf.matmul(tf.expand_dims(scores, axis=1), tf.stack(final_interaction, axis=1)),
 90 |                 axis=1)
 91 |         elif self.sum_weighted:  # 简单的加权求和
 92 |             final_interaction = tf.reduce_sum(
 93 |                 tf.stack(final_interaction, axis=1) * self.weights,
 94 |                 axis=1
 95 |             )
 96 |         else:  # 简单的元素位相加
 97 |             final_interaction = sum(final_interaction)
 98 | 
 99 |         output = tf.layers.dense(final_interaction, 1, activation=tf.nn.sigmoid,
100 |                                  kernel_initializer=tf.glorot_normal_initializer())
101 | 
102 |         return tf.reshape(output, [-1])
103 | 


--------------------------------------------------------------------------------
/recommendation/rank/interaction_expert.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | from .dcn import CrossNetwork
 4 | from .masknet import SerialMaskNet, ParallelMaskNet
 5 | from .pnn import InnerProduct, OuterProduct
 6 | from .contextnet import ContextBlock
 7 | from ..utils.interaction import SENet, BiLinear
 8 | 
 9 | 
10 | class Expert(Enum):
11 |     """
12 |     专家交互网络枚举类
13 |     """
14 |     CrossNetwork = CrossNetwork
15 |     InnerProduct = InnerProduct
16 |     OuterProduct = OuterProduct
17 |     SerialMaskNet = SerialMaskNet
18 |     ParallelMaskNet = ParallelMaskNet
19 |     ContextBlock = ContextBlock
20 |     SENet = SENet
21 |     BiLinear = BiLinear
22 | 
23 |     def init_layer(self, *args, **kwargs):
24 |         return self.value(*args, **kwargs)
25 | 


--------------------------------------------------------------------------------
/recommendation/rank/nfm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | > 论文：Neural Factorization Machines for Sparse Predictive Analytics
 3 | >
 4 | > 地址：https://arxiv.org/pdf/1708.05027.pdf
 5 | """
 6 | from functools import partial
 7 | from typing import Dict as OrderedDictType
 8 | from typing import List, Callable
 9 | 
10 | import tensorflow as tf
11 | 
12 | from ..utils.core import dnn_layer
13 | from ..utils.interaction import LinearEmbedding
14 | from ..utils.type_declaration import LinearTerms, Field
15 | 
16 | 
17 | class NFM:
18 |     def __init__(self,
19 |                  fields_list: List[Field],
20 |                  dnn_hidden_units: List[int],
21 |                  dnn_activation: Callable = None,
22 |                  dnn_dropout: float = 0.,
23 |                  dnn_use_bn: bool = True,
24 |                  dnn_l2_reg: float = 0.,
25 |                  linear_type: LinearTerms = LinearTerms.LW):
26 |         self.num_fields = len(fields_list)
27 | 
28 |         self.dnn_layer = partial(dnn_layer, hidden_units=dnn_hidden_units, activation=dnn_activation,
29 |                                  dropout=dnn_dropout, use_bn=dnn_use_bn, l2_reg=dnn_l2_reg)
30 |         self.dnn_l2_reg = dnn_l2_reg
31 | 
32 |         self.linear = LinearEmbedding(fields_list, linear_type)
33 | 
34 |         self.global_w = tf.get_variable('global_w', shape=[1], dtype=tf.float32,
35 |                                         initializer=tf.zeros_initializer())
36 | 
37 |     def __call__(self,
38 |                  sparse_inputs_dict: OrderedDictType[str, tf.Tensor],
39 |                  dense_inputs_dict: OrderedDictType[str, tf.Tensor],
40 |                  is_training: bool = True):
41 |         """
42 |         未经过embedding layer的输入
43 |         :param sparse_inputs_dict: 离散特征，经过LabelEncoder之后的输入
44 |         :param dense_inputs_dict: 连续值特征
45 |         :return:
46 |         """
47 |         embeddings, linear_logit = self.linear(sparse_inputs_dict, dense_inputs_dict)
48 | 
49 |         bi_interaction_output = self._bi_interaction_layer(embeddings)
50 | 
51 |         dnn_output = self.dnn_layer(bi_interaction_output, is_training=is_training)
52 | 
53 |         dnn_output = tf.layers.dense(dnn_output, 1,
54 |                                      kernel_regularizer=tf.contrib.layers.l2_regularizer(self.dnn_l2_reg),
55 |                                      kernel_initializer=tf.glorot_normal_initializer())
56 |         dnn_output = tf.reshape(dnn_output, [-1])
57 | 
58 |         final_logit = tf.nn.sigmoid(linear_logit + dnn_output + self.global_w)
59 | 
60 |         return final_logit
61 | 
62 |     def _bi_interaction_layer(self, interactions):
63 |         interactions = tf.stack(interactions, axis=1)
64 |         square_of_sum = tf.square(tf.reduce_sum(
65 |             interactions, axis=1))
66 |         sum_of_square = tf.reduce_sum(
67 |             interactions * interactions, axis=1)
68 |         fm_logit = square_of_sum - sum_of_square
69 | 
70 |         return 0.5 * fm_logit
71 | 


--------------------------------------------------------------------------------
/recommendation/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QunBB/DeepLearning/48ced0566108e0442422c817fcb7f51f4ca6548d/recommendation/utils/__init__.py


--------------------------------------------------------------------------------
/recommendation/utils/core.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from typing import List, Callable, Optional, Union
 3 | 
 4 | from tensorflow.python.keras import activations
 5 | 
 6 | 
 7 | def dnn_layer(inputs: tf.Tensor,
 8 |               hidden_units: Union[List[int], int],
 9 |               activation: Optional[Union[Callable, str]] = None,
10 |               dropout: Optional[float] = 0.,
11 |               is_training: Optional[bool] = True,
12 |               use_bn: Optional[bool] = True,
13 |               l2_reg: float = 0.,
14 |               use_bias: bool = True,
15 |               scope=None):
16 |     if isinstance(hidden_units, int):
17 |         hidden_units = [hidden_units]
18 | 
19 |     output = inputs
20 |     for idx, size in enumerate(hidden_units):
21 |         output = tf.layers.dense(output, size,
22 |                                  use_bias=use_bias,
23 |                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
24 |                                  kernel_initializer=tf.glorot_normal_initializer(),
25 |                                  name=scope+f'_{idx}' if scope else None)
26 |         if use_bn:
27 |             output = tf.layers.batch_normalization(output, training=is_training, name=scope+f'_bn_{idx}' if scope else None)
28 | 
29 |         if activation is not None:
30 |             output = activation_layer(activation, is_training=is_training, scope=f'activation_layer_{idx}')(output)
31 | 
32 |         if is_training:
33 |             output = tf.nn.dropout(output, 1 - dropout)
34 | 
35 |     return output
36 | 
37 | 
38 | def activation_layer(activation: Union[Callable, str],
39 |                      scope: Optional[str] = None,
40 |                      is_training: bool = True):
41 |     if isinstance(activation, str):
42 |         if activation.lower() == 'dice':
43 |             return lambda x: dice(x, is_training, scope if scope else '')
44 |         elif activation.lower() == 'prelu':
45 |             return lambda x: prelu(x, scope if scope else '')
46 |         else:
47 |             return activations.get(activation)
48 |     else:
49 |         if activation is dice:
50 |             return lambda x: dice(x, is_training, scope if scope else '')
51 |         elif activation is prelu:
52 |             return lambda x: prelu(x, scope if scope else '')
53 |         else:
54 |             return activation
55 | 
56 | 
57 | def dice(_x, is_training, name=''):
58 |     with tf.variable_scope(name_or_scope=name):
59 |         alphas = tf.get_variable('alpha', _x.get_shape()[-1],
60 |                                  initializer=tf.constant_initializer(0.0),
61 |                                  dtype=tf.float32)
62 |         beta = tf.get_variable('beta', _x.get_shape()[-1],
63 |                                initializer=tf.constant_initializer(0.0),
64 |                                dtype=tf.float32)
65 | 
66 |     x_normed = tf.layers.batch_normalization(_x, center=False, scale=False, name=name, training=is_training)
67 |     x_p = tf.sigmoid(beta * x_normed)
68 | 
69 |     return alphas * (1.0 - x_p) * _x + x_p * _x
70 | 
71 | 
72 | def prelu(_x, scope=''):
73 |     with tf.variable_scope(name_or_scope=scope, reuse=tf.AUTO_REUSE):
74 |         alphas = tf.get_variable('alpha', _x.get_shape()[-1],
75 |                                  initializer=tf.constant_initializer(0.0),
76 |                                  dtype=tf.float32)
77 |     pos = tf.nn.relu(_x)
78 |     neg = alphas * (_x - abs(_x)) * 0.5
79 | 
80 |     return pos + neg
81 | 


--------------------------------------------------------------------------------
/recommendation/utils/losses.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def ranking_loss(y_true, y_pred):
 5 |     """Only compute pairs loss of positive v.s. negative samples.
 6 |     """
 7 | 
 8 |     z_ij = tf.reshape(y_pred, [-1, 1]) - tf.reshape(y_pred, [1, -1])
 9 | 
10 |     y_true = tf.convert_to_tensor(y_true, dtype="int64")
11 |     mask = tf.logical_and(tf.equal(tf.reshape(y_true, [-1, 1]), 1),
12 |                           tf.equal(tf.reshape(y_true, [1, -1]), 0))
13 |     mask = tf.cast(mask, z_ij.dtype)
14 | 
15 |     per_pair_loss = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_ij, z_ij.dtype), z_ij,
16 |                                                     reduction=tf.losses.Reduction.NONE)
17 | 
18 |     num_pairs = tf.reduce_sum(mask)
19 | 
20 |     return tf.reduce_sum(per_pair_loss * mask) / (num_pairs + 1e-7)
21 | 


--------------------------------------------------------------------------------
/recommendation/utils/train_utils.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import re
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def get_assignment_map_from_checkpoint(tvars, init_checkpoint):
 7 |     """Compute the union of the current variables and checkpoint variables."""
 8 |     initialized_variable_names = {}
 9 | 
10 |     name_to_variable = collections.OrderedDict()
11 |     for var in tvars:
12 |         name = var.name
13 |         m = re.match("^(.*):\\d+$", name)
14 |         if m is not None:
15 |             name = m.group(1)
16 |         name_to_variable[name] = var
17 | 
18 |     init_vars = tf.train.list_variables(init_checkpoint)
19 | 
20 |     assignment_map = collections.OrderedDict()
21 |     for x in init_vars:
22 |         (name, var) = (x[0], x[1])
23 |         if name not in name_to_variable:
24 |             continue
25 |         assignment_map[name] = name
26 |         initialized_variable_names[name] = 1
27 |         initialized_variable_names[name + ":0"] = 1
28 | 
29 |     return assignment_map, initialized_variable_names
30 | 


--------------------------------------------------------------------------------
/recommendation/utils/type_declaration.py:
--------------------------------------------------------------------------------
 1 | from enum import IntEnum
 2 | from dataclasses import dataclass
 3 | from typing import Optional, List, Sequence
 4 | 
 5 | 
 6 | class LinearTerms(IntEnum):
 7 |     """FwFMs中的线性项"""
 8 |     LW = 0
 9 |     FeLV = 1
10 |     FiLV = 2
11 | 
12 | 
13 | class FMType(IntEnum):
14 |     """FMs选项"""
15 |     FM = 1
16 |     FwFM = 2
17 |     FEFM = 3
18 |     GwPFM = 4
19 | 
20 | 
21 | @dataclass
22 | class Field:
23 |     name: str
24 |     dim: int = 4  # embedding维度大小
25 |     vocabulary_size: int = 1  # dense类型为1
26 |     l2_reg: float = 0.  # embedding的正则惩罚
27 |     init_mean: float = 0.  # 初始化参数
28 |     init_std: float = 1.0  # 初始化参数
29 |     group: str = "default"  # 多embedding table时，属于哪一组table
30 | 
31 | 
32 | # PNN
33 | class KernelType(IntEnum):
34 |     """
35 |     0-2对应KPNN不同kernel形式，3对应PIN的micro net
36 |     """
37 |     Num = 0
38 |     Vec = 1
39 |     Mat = 2
40 |     Net = 3  # PIN
41 | 
42 | 
43 | @dataclass
44 | class DINField(Field):
45 |     """
46 |     DIN系列模型的Field
47 |     """
48 |     mini_batch_regularization: bool = False  # 是否使用Mini-batch Aware Regularization, 优先于l2_reg
49 |     ids_occurrence: Optional[Sequence[int]] = None  # 特征ID按照0-(K-1)统计在所有样本中出现的频次


--------------------------------------------------------------------------------
/trick/README.md:
--------------------------------------------------------------------------------
1 | 带pt后缀的为pytorch实现版本，不带后缀的则为tensorflow版本。
2 | 
3 | - **变量初始化(initialization)、分层学习率(hierarchical_lr)、梯度累积(gradient_accumulation)**: [专栏](https://zhuanlan.zhihu.com/p/553277132)
4 | - **Stochastic Weight Averaging (SWA)、Exponential Moving Average(EMA)**：[专栏](https://zhuanlan.zhihu.com/p/554955968)
5 | - **(unbalance)分类模型-类别不均衡问题之loss设计 & Label Smoothing**：[专栏](https://zhuanlan.zhihu.com/p/582312784)
6 | 


--------------------------------------------------------------------------------
/trick/ema.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class EMA:
 5 | 
 6 |     def __init__(self, global_step: tf.Variable,
 7 |                  decay: float = 0.999):
 8 | 
 9 |         ema = tf.train.ExponentialMovingAverage(decay, global_step)
10 | 
11 |         vars_list = tf.trainable_variables()
12 | 
13 |         # EMA平滑操作
14 |         self.ema_op = ema.apply(vars_list)
15 | 
16 |         # 原参数替换EMA平滑参数
17 |         self.ema_assign_op = [tf.assign(w, ema.average(w)) for w in vars_list]
18 | 
19 |         # 用于临时存储原来的参数
20 |         backup = [tf.get_variable('ema_backup/' + self._get_var_name(w.name), shape=w.shape, dtype=w.dtype, trainable=False) for w in vars_list]
21 |         self.weight_copy_op = [tf.assign(w1, w2) for w1, w2 in zip(backup, vars_list)]
22 | 
23 |         # 恢复原参数
24 |         self.weight_restore_op = [tf.assign(w1, w2) for w1, w2 in zip(vars_list, backup)]
25 | 
26 |         self.sess = None
27 | 
28 |     def _get_var_name(self, name: str):
29 |         if name.endswith(":0"):
30 |             name = name[:-2]
31 |         return name
32 | 
33 |     def register(self, sess: tf.Session):
34 |         """无需创建shadow变量，ema.apply(vars_list)会自动创建"""
35 |         self.sess = sess
36 | 
37 |     def update(self):
38 |         """EMA平滑操作，更新shadow权重"""
39 |         self.sess.run(self.ema_op)
40 | 
41 |     def apply_shadow(self):
42 |         """使用shadow权重作为模型权重，并创建原模型权重备份"""
43 |         self.sess.run(self.weight_copy_op)
44 |         self.sess.run(self.ema_assign_op)
45 | 
46 |     def restore(self):
47 |         """恢复模型权重"""
48 |         self.sess.run(self.weight_restore_op)
49 | 
50 | 
51 | def train_with_ema(train_op,
52 |                    sess: tf.Session,
53 |                    iterations: int,
54 |                    valid_steps: int):
55 |     global_step = tf.train.get_or_create_global_step()
56 | 
57 |     ema = EMA(global_step, decay=0.999)
58 |     ema.register(sess)
59 | 
60 |     for i in range(iterations):
61 | 
62 |         # 常规的训练，更新权重
63 |         sess.run(train_op)
64 | 
65 |         # 更新ema平滑权重
66 |         ema.update()
67 | 
68 |         if (i + 1) % valid_steps == 0:
69 |             # 使用ema权重
70 |             ema.apply_shadow()
71 | 
72 |             # 验证工作
73 |             print('do valid')
74 | 
75 |             # 保存模型工作
76 |             print('save model')
77 | 
78 |             # 恢复原模型权重，继续正常的训练
79 |             ema.restore()
80 | 


--------------------------------------------------------------------------------
/trick/ema_pt.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.utils.data import DataLoader
 4 | 
 5 | 
 6 | class EMA:
 7 |     def __init__(self, model: nn.Module,
 8 |                  decay: float = 0.999):
 9 |         self.model = model
10 |         self.decay = decay
11 |         self.shadow = {}
12 |         self.backup = {}
13 | 
14 |     def register(self):
15 |         """创建shadow权重"""
16 |         for name, param in self.model.named_parameters():
17 |             if param.requires_grad:
18 |                 self.shadow[name] = param.data.clone()
19 | 
20 |     def update(self):
21 |         """EMA平滑操作，更新shadow权重"""
22 |         for name, param in self.model.named_parameters():
23 |             if param.requires_grad:
24 |                 assert name in self.shadow
25 |                 new_average = (1.0 - self.decay) * param.data + self.decay * self.shadow[name]
26 |                 self.shadow[name] = new_average.clone()
27 | 
28 |     def apply_shadow(self):
29 |         """使用shadow权重作为模型权重，并创建原模型权重备份"""
30 |         for name, param in self.model.named_parameters():
31 |             if param.requires_grad:
32 |                 assert name in self.shadow
33 |                 self.backup[name] = param.data
34 |                 param.data = self.shadow[name]
35 | 
36 |     def restore(self):
37 |         """恢复模型权重"""
38 |         for name, param in self.model.named_parameters():
39 |             if param.requires_grad:
40 |                 assert name in self.backup
41 |                 param.data = self.backup[name]
42 |         self.backup = {}
43 | 
44 | 
45 | def train_with_ema(model: nn.Module,
46 |                    dataloader: DataLoader,
47 |                    valid_steps: int,
48 |                    optimizer: torch.optim.Optimizer):
49 |     # 初始化EMA
50 |     ema = EMA(model, 0.999)
51 |     ema.register()
52 | 
53 |     for i, data in enumerate(dataloader):
54 |         # 正常的训练代码
55 |         model.train()
56 |         loss = model(data)
57 |         loss.backward()
58 | 
59 |         optimizer.step()
60 |         optimizer.zero_grad()
61 | 
62 |         # 更新ema权重
63 |         ema.update()
64 | 
65 |         # 验证&保存模型
66 |         if (i + 1) % valid_steps == 0:
67 |             # 使用ema权重
68 |             ema.apply_shadow()
69 | 
70 |             # 验证工作
71 |             print('do valid')
72 | 
73 |             # 保存模型工作
74 |             print('save model')
75 | 
76 |             # 恢复原模型权重，继续正常的训练
77 |             ema.restore()
78 | 


--------------------------------------------------------------------------------
/trick/gradient_accumulation.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | """
 4 | steps_accumulate为梯度累积的步数，即累积`steps_accumulate`再进行一次反向传播更新参数
 5 | 实现`steps_accumulate * bs`的大批次训练
 6 | """
 7 | 
 8 | 
 9 | def create_train_op(loss: tf.Tensor,
10 |                     global_step: tf.Tensor,
11 |                     steps_accumulate: int):
12 |     opt = tf.train.AdamOptimizer(0.01)
13 | 
14 |     tvs = tf.trainable_variables()
15 | 
16 |     # 创建梯度变量副本，用于累积梯度
17 |     accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in tvs]
18 |     # 清空梯度变量副本
19 |     zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]
20 | 
21 |     # 计算当前批次梯度
22 |     gvs = opt.compute_gradients(loss / steps_accumulate, tvs)
23 | 
24 |     # 将当前批次的梯度累加到`accum_vars`
25 |     accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(gvs)]
26 | 
27 |     # 使用累积的梯度，进行反向传播更新参数
28 |     train_op = opt.apply_gradients([(accum_vars[i], gv[1]) for i, gv in enumerate(gvs)],
29 |                                    global_step=global_step)
30 | 
31 |     return train_op, accum_ops, zero_ops
32 | 
33 | 
34 | def train(loss: tf.Tensor, steps_accumulate: int):
35 |     global_step = tf.train.get_or_create_global_step()
36 |     train_op, accum_ops, zero_ops = create_train_op(loss, global_step, steps_accumulate)
37 | 
38 |     with tf.Session() as sess:
39 |         sess.run(tf.global_variables_initializer())
40 | 
41 |         for i in range(10000):
42 |             # 这里是模拟使用tf.data.Dataset定义输入流
43 |             # 如果是使用placeholder的方式，则需喂入feed_dict数据
44 |             sess.run(accum_ops)
45 | 
46 |             if (i + 1) % steps_accumulate == 0:
47 |                 sess.run(train_op)
48 | 
49 |                 sess.run(zero_ops)
50 | 


--------------------------------------------------------------------------------
/trick/gradient_accumulation_pt.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.utils.data import DataLoader
 4 | 
 5 | 
 6 | # steps_accumulate为梯度累积的步数，即累积`steps_accumulate`再进行一次反向传播更新参数
 7 | # 实现`steps_accumulate * bs`的大批次训练
 8 | def train(model: nn.Module,
 9 |           dataloader: DataLoader,
10 |           optimizer: torch.optim.Optimizer,
11 |           steps_accumulate: int):
12 | 
13 |     model.zero_grad()
14 |     model.train()
15 | 
16 |     for i, data in enumerate(dataloader):
17 | 
18 |         loss = model(data) / steps_accumulate
19 |         loss.backward()
20 | 
21 |         if (i + 1) % steps_accumulate == 0:
22 |             optimizer.step()
23 |             optimizer.zero_grad()
24 | 


--------------------------------------------------------------------------------
/trick/hierarchical_lr.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | lr_dict = {'bert': 1e-5,
 4 |            'default': 1e-3}
 5 | 
 6 | 
 7 | def create_train_op(loss: tf.Tensor, global_step: tf.Tensor):
 8 |     optimizer_dict = {}
 9 |     for key in lr_dict:
10 |         # 这里可以选择其他的优化器
11 |         optimizer_dict[key] = tf.train.AdamOptimizer(learning_rate=lr_dict[key])
12 | 
13 |     # 这里计算梯度与学习率无关, 选择任一optimizer即可
14 |     gradients = optimizer_dict['default'].compute_gradients(loss)
15 | 
16 |     vars_dict = {k: [] for k in lr_dict}
17 |     for grad, var in gradients:
18 |         layer = 'default'  # 默认归属层
19 |         for key in lr_dict:
20 |             if key in var.name:
21 |                 layer = key
22 |                 break
23 |         vars_dict[layer].append((grad, var))
24 | 
25 |     train_op_list = []
26 |     for key, var in vars_dict.items():
27 |         # 在这里根据不同的学习率进行反向传播，更新参数
28 |         # global_step参数None，代表global_step不变
29 |         train_op_list.append(optimizer_dict[key].apply_gradients(vars_dict[key], global_step=None))
30 | 
31 |     # global_step在这里+1
32 |     new_global_step = global_step + 1
33 |     train_op_list.append(global_step.assign(new_global_step))
34 |     train_op = tf.group(*train_op_list)
35 | 
36 |     return train_op
37 | 


--------------------------------------------------------------------------------
/trick/hierarchical_lr_pt.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from transformers import AdamW
 3 | 
 4 | 
 5 | lr_dict = {
 6 |     'bert': {'lr': 1e-5, 'weight_decay': 0.02, 'eps': 1e-6},
 7 |     'default': {'lr': 1e-3, 'weight_decay': 0.01, 'eps': 1e-6},
 8 | }
 9 | 
10 | 
11 | def create_optimizer(model: nn.Module):
12 |     # Set learning_rates for each layers
13 |     no_decay = ["bias", "LayerNorm.weight"]
14 |     optimizer_grouped_parameters_decay = []
15 |     optimizer_grouped_parameters_no_decay = []
16 |     group_id = {}
17 | 
18 |     for i, key in enumerate(lr_dict):
19 |         optimizer_grouped_parameters_decay.append({'params': [],
20 |                                                    'weight_decay': lr_dict[key]['weight_decay'],
21 |                                                    'lr': lr_dict[key]['lr'],
22 |                                                    'eps': lr_dict[key]['eps']})
23 |         optimizer_grouped_parameters_no_decay.append({'params': [],
24 |                                                       'weight_decay': 0.0,
25 |                                                       'lr': lr_dict[key]['lr'],
26 |                                                       'eps': lr_dict[key]['eps']})
27 |         group_id[key] = i
28 | 
29 |     for n, p in model.named_parameters():
30 |         index = group_id['default']
31 |         for key in lr_dict:
32 |             if key in n:
33 |                 index = group_id[key]
34 |                 break
35 | 
36 |         if any(nd in n for nd in no_decay):
37 |             optimizer_grouped_parameters_no_decay[index]['params'].append(p)
38 |         else:
39 |             optimizer_grouped_parameters_decay[index]['params'].append(p)
40 | 
41 |     optimizer = AdamW(
42 |         optimizer_grouped_parameters_decay + optimizer_grouped_parameters_no_decay,
43 |         lr=lr_dict['default']['lr'],
44 |         eps=lr_dict['default']['eps'],
45 |         )
46 | 
47 |     return optimizer
48 | 


--------------------------------------------------------------------------------
/trick/initialization.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | # 初始化参数
 5 | w = tf.get_variable('w', initializer=tf.truncated_normal(shape=[128, 256],
 6 |                                                          mean=0.0,
 7 |                                                          stddev=1.0))
 8 | """
 9 | 截断正态分布：tf.truncated_normal
10 | 普通的正态分布：tf.random_normal
11 | 均匀分布：tf.random_uniform
12 | """
13 | 
14 | # 全连接中的参数初始化
15 | output = tf.layers.dense(w, 128, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=1.0))
16 | """
17 | tf.random_normal_initializer()
18 | tf.glorot_normal_initializer()
19 | tf.initializers.he_normal()
20 | tf.initializers.lecun_normal()
21 | 
22 | 还有，以上对应的均匀分布。如 random_uniform_initializer
23 | """
24 | 
25 | 
26 | import math
27 | import torch
28 | import torch.nn as nn
29 | 
30 | 
31 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
32 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
33 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
34 |     def norm_cdf(x):
35 |         # Computes standard normal cumulative distribution function
36 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
37 | 
38 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
39 |         print("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
40 |               "The distribution of values may be incorrect.")
41 | 
42 |     with torch.no_grad():
43 |         # Values are generated by using a truncated uniform distribution and
44 |         # then using the inverse CDF for the normal distribution.
45 |         # Get upper and lower cdf values
46 |         l = norm_cdf((a - mean) / std)
47 |         u = norm_cdf((b - mean) / std)
48 | 
49 |         # Uniformly fill tensor with values from [l, u], then translate to
50 |         # [2l-1, 2u-1].
51 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
52 | 
53 |         # Use inverse cdf transform for normal distribution to get truncated
54 |         # standard normal
55 |         tensor.erfinv_()
56 | 
57 |         # Transform to proper mean, std
58 |         tensor.mul_(std * math.sqrt(2.))
59 |         tensor.add_(mean)
60 | 
61 |         # Clamp to ensure it's in the proper range
62 |         tensor.clamp_(min=a, max=b)
63 |         return tensor
64 | 
65 | 
66 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
67 |     # type: (Tensor, float, float, float, float) -> Tensor
68 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
69 | 
70 | 
71 | class MyModel(nn.Module):
72 |     def __init__(self):
73 |         super(MyModel, self).__init__()
74 | 
75 |         weight = nn.Parameter(torch.zeros(128, 256))
76 | 
77 |         linear = nn.Linear(256, 128)
78 | 
79 |         norm = nn.LayerNorm(256)
80 | 
81 |         # Parameter参数初始化
82 |         trunc_normal_(weight, std=.02)
83 |         # 全连接层、Layer Normalization层的参数初始化
84 |         self.apply(self._init_weights)
85 | 
86 |     def _init_weights(self, m):
87 |         if isinstance(m, nn.Linear):
88 |             trunc_normal_(m.weight, std=.02)
89 |             if isinstance(m, nn.Linear) and m.bias is not None:
90 |                 nn.init.constant_(m.bias, 0)
91 |         elif isinstance(m, nn.LayerNorm):
92 |             nn.init.constant_(m.bias, 0)
93 |             nn.init.constant_(m.weight, 1.0)
94 | 
95 |     def forward(self, x):
96 |         pass
97 | 


--------------------------------------------------------------------------------
/trick/swa.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | 
 5 | def apply_swa(checkpoint_list: list,
 6 |               weight_list: list,
 7 |               save_path: str,
 8 |               sess: tf.Session = None,
 9 |               strict: bool = True):
10 |     """
11 | 
12 |     :param checkpoint_list: 要进行swa的模型路径列表
13 |     :param weight_list: 每个模型对应的权重
14 |     :param save_path: swa后的模型导出路径
15 |     :param sess:
16 |     :param strict: 是否需要完全匹配checkpoint的参数
17 |     :return:
18 |     """
19 |     vars_list = tf.trainable_variables()
20 |     saver = tf.train.Saver(var_list=vars_list)
21 | 
22 |     swa_op = []
23 |     for var in vars_list:
24 |         temp = []
25 |         try:
26 |             temp = [tf.train.load_variable(path, var.name) * w for path, w in zip(checkpoint_list, weight_list)]
27 |         except tf.python.framework.errors_impl.NotFoundError:
28 |             print(f"checkpoint don't match the model, var: '{var.name}' not in checkpoint")
29 |             if strict:
30 |                 raise tf.python.framework.errors_impl.NotFoundError
31 | 
32 |         swa_op.append(tf.assign(var, np.sum(temp, axis=0)))
33 | 
34 |     if sess is None:
35 |         sess = tf.Session()
36 |     with sess.as_default() as sess:
37 |         sess.run(swa_op)
38 |         saver.save(sess, save_path)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     # 测试程序
43 |     from nlp.sentence_bert.bert import BertConfig, BertModel
44 |     model = BertModel(config=BertConfig.from_json_file('chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json'),
45 |                       is_training=False,
46 |                       input_ids=tf.placeholder(tf.int32, [None, 128]))
47 |     apply_swa(checkpoint_list=['chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt',
48 |                                'chinese_macbert_base/chinese_macbert_base.ckpt'],
49 |               weight_list=[0.5, 0.5],
50 |               save_path='bert_swa.ckpt')
51 | 


--------------------------------------------------------------------------------
/trick/swa_pt.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def apply_swa(model: nn.Module,
 6 |               checkpoint_list: list,
 7 |               weight_list: list,
 8 |               strict: bool = True):
 9 |     """
10 | 
11 |     :param model:
12 |     :param checkpoint_list: 要进行swa的模型路径列表
13 |     :param weight_list: 每个模型对应的权重
14 |     :param strict: 输入模型参数与checkpoint是否需要完全匹配
15 |     :return:
16 |     """
17 | 
18 |     checkpoint_tensor_list = [torch.load(f, map_location='cpu') for f in checkpoint_list]
19 | 
20 |     for name, param in model.named_parameters():
21 |         try:
22 |             param.data = sum([ckpt['model'][name] * w for ckpt, w in zip(checkpoint_tensor_list, weight_list)])
23 |         except KeyError:
24 |             if strict:
25 |                 raise KeyError(f"Can't match '{name}' from checkpoint")
26 |             else:
27 |                 print(f"Can't match '{name}' from checkpoint")
28 | 
29 |     return model
30 | 


--------------------------------------------------------------------------------
/triton/client/_grpc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import tritonclient.grpc as grpcclient
 4 | 
 5 | 
 6 | def client_init(url="localhost:8001",
 7 |                 ssl=False, private_key=None, root_certificates=None, certificate_chain=None,
 8 |                 verbose=False):
 9 |     """
10 | 
11 |     :param url:
12 |     :param ssl: Enable SSL encrypted channel to the server
13 |     :param private_key: File holding PEM-encoded private key
14 |     :param root_certificates: File holding PEM-encoded root certificates
15 |     :param certificate_chain: File holding PEM-encoded certicate chain
16 |     :param verbose:
17 |     :return:
18 |     """
19 |     triton_client = grpcclient.InferenceServerClient(
20 |         url=url,
21 |         verbose=verbose,
22 |         ssl=ssl,
23 |         root_certificates=root_certificates,
24 |         private_key=private_key,
25 |         certificate_chain=certificate_chain)
26 | 
27 |     return triton_client
28 | 
29 | 
30 | def infer(triton_client, model_name,
31 |           input0='INPUT0', input1='INPUT1',
32 |           output0='OUTPUT0', output1='OUTPUT1',
33 |           compression_algorithm=None):
34 |     inputs = []
35 |     outputs = []
36 |     # batch_size=8
37 |     # 如果batch_size超过配置文件的max_batch_size，infer则会报错
38 |     # INPUT0、INPUT1为配置文件中的输入节点名称
39 |     inputs.append(grpcclient.InferInput(input0, [8, 2], "FP32"))
40 |     inputs.append(grpcclient.InferInput(input1, [8, 2], "INT32"))
41 | 
42 |     # Initialize the data
43 |     # np.random.seed(2022)
44 |     inputs[0].set_data_from_numpy(np.random.random([8, 2]).astype(np.float32))
45 |     # np.random.seed(2022)
46 |     inputs[1].set_data_from_numpy(np.random.randint(0, 20, [8, 2]).astype(np.int32))
47 | 
48 |     # OUTPUT0、OUTPUT1为配置文件中的输出节点名称
49 |     outputs.append(grpcclient.InferRequestedOutput(output0))
50 |     outputs.append(grpcclient.InferRequestedOutput(output1))
51 | 
52 |     results = triton_client.infer(
53 |         model_name=model_name,
54 |         inputs=inputs,
55 |         outputs=outputs,
56 |         compression_algorithm=compression_algorithm
57 |         # client_timeout=0.1
58 |     )
59 |     print(results)
60 |     # 转化为numpy格式
61 |     print(results.as_numpy(output0))
62 |     print(results.as_numpy(output1))
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     import time
67 | 
68 |     client = client_init()
69 | 
70 |     s = time.time()
71 | 
72 |     infer(triton_client=client, model_name='tf_savemodel')
73 |     #
74 |     # infer(triton_client=client, model_name='tf_graphdef')
75 |     #
76 |     # infer(triton_client=client, model_name='torch_model',
77 |     #       input0='INPUT__0', input1='INPUT__1',
78 |     #       output0='OUTPUT__0', output1='OUTPUT__1')
79 |     #
80 |     # infer(triton_client=client, model_name='tf_onnx',
81 |     #       input0='INPUT:0', input1='INPUT:1')
82 | 
83 |     # infer(triton_client=client, model_name='torch_onnx')
84 | 
85 |     # infer(triton_client=client, model_name='trt_model')
86 | 
87 |     print("grpc infer: {}".format(time.time() - s))
88 | 


--------------------------------------------------------------------------------
/triton/client/_http.py:
--------------------------------------------------------------------------------
  1 | """
  2 | https://github.com/triton-inference-server/client/tree/main/src/python/examples
  3 | """
  4 | 
  5 | import gevent.ssl
  6 | import numpy as np
  7 | import tritonclient.http as httpclient
  8 | 
  9 | 
 10 | def client_init(url="localhost:8000",
 11 |                 ssl=False, key_file=None, cert_file=None, ca_certs=None, insecure=False,
 12 |                 verbose=False):
 13 |     """
 14 | 
 15 |     :param url:
 16 |     :param ssl: Enable encrypted link to the server using HTTPS
 17 |     :param key_file: File holding client private key
 18 |     :param cert_file: File holding client certificate
 19 |     :param ca_certs: File holding ca certificate
 20 |     :param insecure: Use no peer verification in SSL communications. Use with caution
 21 |     :param verbose: Enable verbose output
 22 |     :return:
 23 |     """
 24 |     if ssl:
 25 |         ssl_options = {}
 26 |         if key_file is not None:
 27 |             ssl_options['keyfile'] = key_file
 28 |         if cert_file is not None:
 29 |             ssl_options['certfile'] = cert_file
 30 |         if ca_certs is not None:
 31 |             ssl_options['ca_certs'] = ca_certs
 32 |         ssl_context_factory = None
 33 |         if insecure:
 34 |             ssl_context_factory = gevent.ssl._create_unverified_context
 35 |         triton_client = httpclient.InferenceServerClient(
 36 |             url=url,
 37 |             verbose=verbose,
 38 |             ssl=True,
 39 |             ssl_options=ssl_options,
 40 |             insecure=insecure,
 41 |             ssl_context_factory=ssl_context_factory)
 42 |     else:
 43 |         triton_client = httpclient.InferenceServerClient(
 44 |             url=url, verbose=verbose)
 45 | 
 46 |     return triton_client
 47 | 
 48 | 
 49 | def infer(triton_client, model_name,
 50 |           input0='INPUT0', input1='INPUT1',
 51 |           output0='OUTPUT0', output1='OUTPUT1',
 52 |           request_compression_algorithm=None,
 53 |           response_compression_algorithm=None):
 54 |     """
 55 | 
 56 |     :param triton_client:
 57 |     :param model_name:
 58 |     :param input0:
 59 |     :param input1:
 60 |     :param output0:
 61 |     :param output1:
 62 |     :param request_compression_algorithm: Optional HTTP compression algorithm to use for the request body on client side.
 63 |             Currently supports "deflate", "gzip" and None. By default, no compression is used.
 64 |     :param response_compression_algorithm:
 65 |     :return:
 66 |     """
 67 |     inputs = []
 68 |     outputs = []
 69 |     # batch_size=8
 70 |     # 如果batch_size超过配置文件的max_batch_size，infer则会报错
 71 |     # INPUT0、INPUT1为配置文件中的输入节点名称
 72 |     inputs.append(httpclient.InferInput(input0, [8, 2], "FP32"))
 73 |     inputs.append(httpclient.InferInput(input1, [8, 2], "INT32"))
 74 | 
 75 |     # Initialize the data
 76 |     # np.random.seed(2022)
 77 |     inputs[0].set_data_from_numpy(np.random.random([8, 2]).astype(np.float32), binary_data=False)
 78 |     # np.random.seed(2022)
 79 |     inputs[1].set_data_from_numpy(np.random.randint(0, 20, [8, 2]).astype(np.int32), binary_data=False)
 80 | 
 81 |     # OUTPUT0、OUTPUT1为配置文件中的输出节点名称
 82 |     outputs.append(httpclient.InferRequestedOutput(output0, binary_data=False))
 83 |     outputs.append(httpclient.InferRequestedOutput(output1,
 84 |                                                    binary_data=False))
 85 |     query_params = {'test_1': 1, 'test_2': 2}
 86 |     results = triton_client.infer(
 87 |         model_name=model_name,
 88 |         inputs=inputs,
 89 |         outputs=outputs,
 90 |         request_compression_algorithm=request_compression_algorithm,
 91 |         response_compression_algorithm=response_compression_algorithm)
 92 |     print(results)
 93 |     # 转化为numpy格式
 94 |     print(results.as_numpy(output0))
 95 |     print(results.as_numpy(output1))
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     import time
100 | 
101 |     client = client_init()
102 | 
103 |     s = time.time()
104 | 
105 |     infer(triton_client=client, model_name='tf_savemodel')
106 |     #
107 |     # infer(triton_client=client, model_name='tf_graphdef')
108 |     #
109 |     # infer(triton_client=client, model_name='torch_model',
110 |     #       input0='INPUT__0', input1='INPUT__1',
111 |     #       output0='OUTPUT__0', output1='OUTPUT__1')
112 |     #
113 |     # infer(triton_client=client, model_name='tf_onnx',
114 |     #       input0='INPUT:0', input1='INPUT:1')
115 | 
116 |     # infer(triton_client=client, model_name='torch_onnx')
117 | 
118 |     # infer(triton_client=client, model_name='trt_model')
119 | 
120 |     print(time.time() - s)
121 | 


--------------------------------------------------------------------------------
/triton/gen_model/onnx_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Exporting To ONNX From TensorFlow
  3 | 
  4 | pip install -U tf2onnx
  5 | 
  6 | # savedmodel
  7 | python -m tf2onnx.convert --saved-model tensorflow-model-path --output model.onnx
  8 | 
  9 | # checkpoint
 10 | python -m tf2onnx.convert --checkpoint tensorflow-model-meta-file-path --output model.onnx --inputs input0:0,input1:0 --outputs output0:0
 11 | 
 12 | # graphdef
 13 | python -m tf2onnx.convert --graphdef tensorflow-model-graphdef-file --output model.onnx --inputs input0:0,input1:0 --outputs output0:0
 14 | """
 15 | 
 16 | import os
 17 | import torch
 18 | import torch.onnx
 19 | 
 20 | from torch_model import MyNet
 21 | 
 22 | 
 23 | def torch2onnx(model_version_dir, max_batch):
 24 |     # 定义输入的格式
 25 |     example_input0 = torch.zeros([max_batch, 2], dtype=torch.float32)
 26 |     example_input1 = torch.zeros([max_batch, 2], dtype=torch.int32)
 27 | 
 28 |     my_model = MyNet()
 29 | 
 30 |     try:
 31 |         os.makedirs(model_version_dir)
 32 |     except OSError as ex:
 33 |         pass  # ignore existing dir
 34 | 
 35 |     torch.onnx.export(my_model,
 36 |                       (example_input0, example_input1),
 37 |                       os.path.join(model_version_dir, 'model.onnx'),
 38 |                       # 输入节点的名称
 39 |                       input_names=("INPUT0", "INPUT1"),
 40 |                       # 输出节点的名称
 41 |                       output_names=("OUTPUT0", "OUTPUT1"),
 42 |                       # 设置batch_size的维度
 43 |                       dynamic_axes={"INPUT0": [0], "INPUT1": [0], "OUTPUT0": [0], "OUTPUT1": [0]},
 44 |                       verbose=True)
 45 | 
 46 | 
 47 | def create_modelconfig(models_dir, max_batch, version_policy=None):
 48 |     model_name = os.path.basename(models_dir)
 49 |     config_dir = models_dir
 50 | 
 51 |     # version policy
 52 |     version_policy_str = "{ latest { num_versions: 1 }}"
 53 |     if version_policy is not None:
 54 |         type, val = version_policy
 55 |         if type == 'latest':
 56 |             version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(
 57 |                 val)
 58 |         elif type == 'specific':
 59 |             version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
 60 |         else:
 61 |             version_policy_str = "{ all { }}"
 62 | 
 63 |     # 这里的 name、data_type、dims 是根据以上模型的输入&输出节点配置
 64 |     # dims不包含batch_size维度
 65 |     config = '''
 66 | name: "{}"
 67 | platform: "onnxruntime_onnx"
 68 | max_batch_size: {}
 69 | version_policy: {}
 70 | input [
 71 |   {{
 72 |     name: "INPUT0"
 73 |     data_type: TYPE_FP32
 74 |     dims: [ 2 ]
 75 |   }},
 76 |   {{
 77 |     name: "INPUT1"
 78 |     data_type: TYPE_INT32
 79 |     dims: [ 2 ]
 80 |   }}
 81 | ]
 82 | output [
 83 |   {{
 84 |     name: "OUTPUT0"
 85 |     data_type: TYPE_FP32
 86 |     dims: [ 2 ]
 87 |   }},
 88 |   {{
 89 |     name: "OUTPUT1"
 90 |     data_type: TYPE_FP32
 91 |     dims: [ 2,10 ]
 92 |   }}
 93 | ]
 94 | '''.format(model_name, max_batch, version_policy_str)
 95 | 
 96 |     try:
 97 |         os.makedirs(config_dir)
 98 |     except OSError as ex:
 99 |         pass  # ignore existing dir
100 | 
101 |     with open(config_dir + "/config.pbtxt", "w") as cfile:
102 |         cfile.write(config)
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     max_batch = 8
107 |     model_version_dir = '/Users/hong/Desktop/server/docs/examples/model_repository/torch_onnx/1'
108 |     torch2onnx(model_version_dir=model_version_dir,
109 |                max_batch=max_batch)
110 |     create_modelconfig(models_dir=os.path.dirname(model_version_dir),
111 |                        max_batch=max_batch)
112 | 


--------------------------------------------------------------------------------
/triton/gen_model/tensorrt_model.py:
--------------------------------------------------------------------------------
  1 | import tensorrt as trt
  2 | import os
  3 | 
  4 | 
  5 | def onnx2trt(model_version_dir, onnx_model_file, max_batch):
  6 |     logger = trt.Logger(trt.Logger.WARNING)
  7 | 
  8 |     builder = trt.Builder(logger)
  9 | 
 10 |     # The EXPLICIT_BATCH flag is required in order to import models using the ONNX parser
 11 |     network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
 12 | 
 13 |     parser = trt.OnnxParser(network, logger)
 14 | 
 15 |     success = parser.parse_from_file(onnx_model_file)
 16 |     for idx in range(parser.num_errors):
 17 |         print(parser.get_error(idx))
 18 | 
 19 |     if not success:
 20 |         pass  # Error handling code here
 21 | 
 22 |     profile = builder.create_optimization_profile()
 23 |     # INPUT0可以接收[1, 2] -> [max_batch, 2]的维度
 24 |     profile.set_shape("INPUT0", [1, 2], [1, 2], [max_batch, 2])
 25 |     profile.set_shape("INPUT1", [1, 2], [1, 2], [max_batch, 2])
 26 | 
 27 |     config = builder.create_builder_config()
 28 |     config.add_optimization_profile(profile)
 29 | 
 30 |     # tensorrt 8.x
 31 |     # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)  # 1 MiB
 32 | 
 33 |     # tensorrt 7.x
 34 |     config.max_workspace_size = 1 << 20
 35 | 
 36 |     try:
 37 |         engine_bytes = builder.build_serialized_network(network, config)
 38 |     except AttributeError:
 39 |         engine = builder.build_engine(network, config)
 40 |         engine_bytes = engine.serialize()
 41 |         del engine
 42 | 
 43 |     with open(os.path.join(model_version_dir, 'model.plan'), "wb") as f:
 44 |         f.write(engine_bytes)
 45 | 
 46 | 
 47 | def create_modelconfig(models_dir, max_batch, version_policy=None):
 48 |     model_name = os.path.basename(models_dir)
 49 |     config_dir = models_dir
 50 | 
 51 |     # version policy
 52 |     version_policy_str = "{ latest { num_versions: 1 }}"
 53 |     if version_policy is not None:
 54 |         type, val = version_policy
 55 |         if type == 'latest':
 56 |             version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(
 57 |                 val)
 58 |         elif type == 'specific':
 59 |             version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
 60 |         else:
 61 |             version_policy_str = "{ all { }}"
 62 | 
 63 |     # 这里的 name、data_type、dims 是根据以上模型的输入&输出节点配置
 64 |     # dims不包含batch_size维度
 65 |     config = '''
 66 | name: "{}"
 67 | platform: "tensorrt_plan"
 68 | max_batch_size: {}
 69 | version_policy: {}
 70 | input [
 71 |   {{
 72 |     name: "INPUT0"
 73 |     data_type: TYPE_FP32
 74 |     dims: [ 2 ]
 75 |   }},
 76 |   {{
 77 |     name: "INPUT1"
 78 |     data_type: TYPE_INT32
 79 |     dims: [ 2 ]
 80 |   }}
 81 | ]
 82 | output [
 83 |   {{
 84 |     name: "OUTPUT0"
 85 |     data_type: TYPE_FP32
 86 |     dims: [ 2 ]
 87 |   }},
 88 |   {{
 89 |     name: "OUTPUT1"
 90 |     data_type: TYPE_FP32
 91 |     dims: [ 2,10 ]
 92 |   }}
 93 | ]
 94 | '''.format(model_name, max_batch, version_policy_str)
 95 | 
 96 |     try:
 97 |         os.makedirs(config_dir)
 98 |     except OSError as ex:
 99 |         pass  # ignore existing dir
100 | 
101 |     with open(config_dir + "/config.pbtxt", "w") as cfile:
102 |         cfile.write(config)
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     max_batch = 8
107 |     model_version_dir = '/Users/hong/Desktop/server/docs/examples/model_repository/torch_onnx/1'
108 |     onnx2trt(model_version_dir=model_version_dir,
109 |              onnx_model_file='model.onnx',
110 |              max_batch=max_batch)
111 |     create_modelconfig(models_dir=os.path.dirname(model_version_dir),
112 |                        max_batch=max_batch)
113 | 


--------------------------------------------------------------------------------
/triton/gen_model/torch_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | from torch import nn
  4 | 
  5 | 
  6 | class MyNet(nn.Module):
  7 | 
  8 |     def __init__(self):
  9 |         super(MyNet, self).__init__()
 10 | 
 11 |         self.embedding = nn.Embedding(num_embeddings=100,
 12 |                                       embedding_dim=10)
 13 | 
 14 |     def forward(self, input0, input1):
 15 |         # tf.add(tf.multiply(x1, 0.5), 2)
 16 |         output0 = torch.add(torch.multiply(input0, 0.5), 2)
 17 | 
 18 |         output1 = self.embedding(input1)
 19 | 
 20 |         return output0, output1
 21 | 
 22 | 
 23 | def create_modelfile(model_version_dir, max_batch,
 24 |                      version_policy=None):
 25 |     # your model net
 26 | 
 27 |     # 定义输入的格式
 28 |     example_input0 = torch.zeros([2], dtype=torch.float32)
 29 |     example_input1 = torch.zeros([2], dtype=torch.int32)
 30 | 
 31 |     my_model = MyNet()
 32 | 
 33 |     traced = torch.jit.trace(my_model, (example_input0, example_input1))
 34 | 
 35 |     try:
 36 |         os.makedirs(model_version_dir)
 37 |     except OSError as ex:
 38 |         pass  # ignore existing dir
 39 | 
 40 |     traced.save(model_version_dir + "/model.pt")
 41 | 
 42 |     create_modelconfig(models_dir=os.path.dirname(model_version_dir),
 43 |                        max_batch=max_batch,
 44 |                        version_policy=version_policy)
 45 | 
 46 | 
 47 | def create_modelconfig(models_dir, max_batch, version_policy=None):
 48 |     model_name = os.path.basename(models_dir)
 49 |     config_dir = models_dir
 50 | 
 51 |     # version policy
 52 |     version_policy_str = "{ latest { num_versions: 1 }}"
 53 |     if version_policy is not None:
 54 |         type, val = version_policy
 55 |         if type == 'latest':
 56 |             version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(
 57 |                 val)
 58 |         elif type == 'specific':
 59 |             version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
 60 |         else:
 61 |             version_policy_str = "{ all { }}"
 62 | 
 63 |     # 这里的 name、data_type、dims 是根据以上模型的输入&输出节点配置
 64 |     # dims不包含batch_size维度
 65 |     # 默认forward中的第一个输入名称为INPUT__0，第一个返回(输出)名称为OUTPUT__0
 66 |     config = '''
 67 | name: "{}"
 68 | platform: "pytorch_libtorch"
 69 | max_batch_size: {}
 70 | version_policy: {}
 71 | input [
 72 |   {{
 73 |     name: "INPUT__0"
 74 |     data_type: TYPE_FP32
 75 |     dims: [ 2 ]
 76 |   }},
 77 |   {{
 78 |     name: "INPUT__1"
 79 |     data_type: TYPE_INT32
 80 |     dims: [ 2 ]
 81 |   }}
 82 | ]
 83 | output [
 84 |   {{
 85 |     name: "OUTPUT__0"
 86 |     data_type: TYPE_FP32
 87 |     dims: [ 2 ]
 88 |   }},
 89 |   {{
 90 |     name: "OUTPUT__1"
 91 |     data_type: TYPE_FP32
 92 |     dims: [ 2,10 ]
 93 |   }}
 94 | ]
 95 | '''.format(model_name, max_batch, version_policy_str)
 96 | 
 97 |     try:
 98 |         os.makedirs(config_dir)
 99 |     except OSError as ex:
100 |         pass  # ignore existing dir
101 | 
102 |     with open(config_dir + "/config.pbtxt", "w") as cfile:
103 |         cfile.write(config)
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     create_modelfile(
108 |         model_version_dir="/Users/hong/Desktop/server/docs/examples/model_repository/torch_model/1",
109 |         max_batch=8,
110 |         # model_version_dir="/Users/hong/Desktop/server/docs/examples/model_repository/tf_savemodel/1",
111 |         # save_type="savemodel"
112 |     )
113 | 


--------------------------------------------------------------------------------