├── .idea
├── .gitignore
├── CTR_Function.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── LICENSE
├── README.md
├── data
└── README.md
├── example
└── ctr_example
│ ├── dmin_seq.py
│ ├── gsp_seq.py
│ ├── seq.py
│ ├── session.py
│ ├── session_prepare.py
│ ├── sim_seq.py
│ ├── timeInterval.py
│ └── un_seq.py
├── kon
├── __init__.py
├── model
│ ├── __init__.py
│ ├── ctr_model
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── layer
│ │ │ ├── __init__.py
│ │ │ ├── behavior_layer
│ │ │ │ ├── __init__.py
│ │ │ │ ├── behavior_layer.py
│ │ │ │ └── rnn_demo.py
│ │ │ ├── core_layer
│ │ │ │ ├── __init__.py
│ │ │ │ └── core_layer.py
│ │ │ └── interactive_layer
│ │ │ │ ├── __init__.py
│ │ │ │ └── interactive_layer.py
│ │ └── model
│ │ │ ├── __init__.py
│ │ │ └── models.py
│ ├── cvr_model
│ │ ├── __init__.py
│ │ ├── layer
│ │ │ └── __init__.py
│ │ └── model
│ │ │ └── __init__.py
│ ├── embedding
│ │ ├── .idea
│ │ │ ├── embedding.iml
│ │ │ ├── encodings.xml
│ │ │ ├── misc.xml
│ │ │ └── modules.xml
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── logs
│ │ │ └── 0
│ │ │ │ ├── best_weights.h5
│ │ │ │ ├── events.out.tfevents.1564644409.dream-System
│ │ │ │ ├── events.out.tfevents.1565180032.dream-System
│ │ │ │ └── events.out.tfevents.1565180080.dream-System
│ │ ├── model_test.py
│ │ ├── other
│ │ │ ├── __init__.py
│ │ │ ├── other-collections.py
│ │ │ └── other-networks.py
│ │ ├── setence_model
│ │ │ ├── __init__.py
│ │ │ ├── backone_language_model.py
│ │ │ ├── backone_optimize.py
│ │ │ ├── deepwalk.py
│ │ │ ├── line.py
│ │ │ ├── logs
│ │ │ │ └── 0
│ │ │ │ │ ├── best_weights.h5
│ │ │ │ │ ├── events.out.tfevents.1565011299.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565011324.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565011336.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565013918.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565013943.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565013958.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565013985.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565014029.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565014060.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565014368.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565014404.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565014481.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565014728.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565014760.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565014805.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565015151.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565015263.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565015277.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565015308.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565057550.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565058087.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565058252.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565058261.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565058653.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565058673.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565058702.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565059234.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565059587.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565059681.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565059708.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565059726.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565059768.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565059787.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565060677.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565060761.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565060853.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565069889.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565069922.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565069970.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070262.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070318.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070526.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070581.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070607.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070688.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070826.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070867.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070932.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070966.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565070986.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565071024.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565162850.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565165341.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565168457.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565170961.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565173560.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565173578.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565173609.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565173761.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565174061.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565174117.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565174191.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565174253.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565174276.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565174293.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565174349.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565174378.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565179687.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565182503.dream-System
│ │ │ │ │ ├── events.out.tfevents.1565182554.dream-System
│ │ │ │ │ └── train
│ │ │ │ │ ├── events.out.tfevents.1577277074.hjq-Precision-T7610.19721.672.v2
│ │ │ │ │ ├── events.out.tfevents.1577277074.hjq-Precision-T7610.profile-empty
│ │ │ │ │ ├── events.out.tfevents.1577277128.hjq-Precision-T7610.20083.106.v2
│ │ │ │ │ ├── events.out.tfevents.1577277719.hjq-Precision-T7610.20872.106.v2
│ │ │ │ │ ├── events.out.tfevents.1577277787.hjq-Precision-T7610.21065.672.v2
│ │ │ │ │ ├── events.out.tfevents.1577278233.hjq-Precision-T7610.21443.672.v2
│ │ │ │ │ ├── events.out.tfevents.1577278349.hjq-Precision-T7610.21613.672.v2
│ │ │ │ │ ├── events.out.tfevents.1577278745.hjq-Precision-T7610.22262.672.v2
│ │ │ │ │ ├── events.out.tfevents.1577279268.hjq-Precision-T7610.22939.672.v2
│ │ │ │ │ ├── events.out.tfevents.1577279585.hjq-Precision-T7610.2711.672.v2
│ │ │ │ │ ├── events.out.tfevents.1577280012.hjq-Precision-T7610.3191.672.v2
│ │ │ │ │ └── plugins
│ │ │ │ │ └── profile
│ │ │ │ │ ├── 2019-12-25_20-31-14
│ │ │ │ │ └── local.trace
│ │ │ │ │ ├── 2019-12-25_20-32-08
│ │ │ │ │ └── local.trace
│ │ │ │ │ ├── 2019-12-25_20-41-59
│ │ │ │ │ └── local.trace
│ │ │ │ │ ├── 2019-12-25_20-43-07
│ │ │ │ │ └── local.trace
│ │ │ │ │ ├── 2019-12-25_20-50-33
│ │ │ │ │ └── local.trace
│ │ │ │ │ ├── 2019-12-25_20-52-29
│ │ │ │ │ └── local.trace
│ │ │ │ │ ├── 2019-12-25_20-59-05
│ │ │ │ │ └── local.trace
│ │ │ │ │ ├── 2019-12-25_21-07-48
│ │ │ │ │ └── local.trace
│ │ │ │ │ ├── 2019-12-25_21-13-05
│ │ │ │ │ └── local.trace
│ │ │ │ │ └── 2019-12-25_21-20-12
│ │ │ │ │ └── local.trace
│ │ │ ├── node2vec.py
│ │ │ ├── sdne.py
│ │ │ └── walk_core_model.py
│ │ ├── util
│ │ │ ├── __init__.py
│ │ │ ├── evaluate.py
│ │ │ ├── test.txt
│ │ │ └── util_tool.py
│ │ └── wiki
│ │ │ ├── Wiki_category.txt
│ │ │ ├── Wiki_edgelist.txt
│ │ │ └── Wiki_labels.txt
│ └── feature_eng
│ │ ├── __init__.py
│ │ ├── base_model.py
│ │ └── feature_transform.py
├── utils
│ ├── __init__.py
│ └── data_prepare.py
└── wrapper
│ ├── Feature_Columns.py
│ └── __init__.py
└── paper
├── Next Read
├── A Convolutional Click Prediction Model.pdf
├── [DSSM] Learning Deep Structured Semantic Models for Web Search using Clickthrough Data (UIUC 2013).pdf
├── [ESMM] Entire Space Multi-Task Model - An Effective Approach for Estimating Post-Click Conversion Rate (Alibaba 2018).pdf
├── [FAT-DeepFFM]Field Attentive Deep Field-aware Factorization Machine[2019].pdf
├── [FGCNN]Feature Generation by Convolutional Neural Network forClick-Through Rate Predicti[2019].pdf
├── [FLEN] Leveraging Field for Scalable CTR Predicti[2019].pdf
├── [FTRL] Ad Click Prediction a View from the Trenches (Google 2013).pdf
├── [Fi-GNN]Modeling Feature Interactions via Graph Neural Networks for CTR Prediction[2019].pdf
├── [FiBiNET]Combining Feature Importance and Bilinear featureInteraction for Click-Through Rate Predict[2019].pdf
├── [GBDT+LR] Practical Lessons from Predicting Clicks on Ads at Facebook (Facebook 2014).pdf
├── [Image CTR] Image Matters - Visually modeling user behaviors using Advanced Model Server (Alibaba 2018).pdf
├── [MINDN]Multi-Interest Network with Dynamic Routing for Recommendation at Tmall[2019].pdf
├── [OENN]Order-aware Embedding Neural Network for CTR Predicti[2019].pdf
├── [ONN]Operation-aware Neural Networks for User Response Prediction[2019].pdf
├── [PS-PLM] Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction (Alibaba 2017).pdf
├── [RLAC]Representation Learning-Assisted Click-Through Rate Prediction[2019].pdf
└── [Warm Up Cold-start Advertisements]Improving CTR Predictions via Learning to Learn ID Embeddings[2019].pdf
├── README
├── behavior
├── [ALSH]Asymmetric LSH for Sublinear Time Maximum Inner Product Search[2014].pdf
├── [BST]Behavior Sequence Transformer for E-commerce Recommendation in Alibaba[2019].pdf
├── [DIEN] Deep Interest Evolution Network for Click-Through Rate Prediction (Alibaba 2019).pdf
├── [DIN] Deep Interest Network for Click-Through Rate Prediction (Alibaba 2018).pdf
├── [DSIN]Deep Session Interest Network for Click-Through Rate Predicti[2019].pdf
├── [DSTN]Deep Spatio-Temporal Neural Networks for Click-Through Rate Prediction[2019].pdf
├── [DTSF]Deep Time-Stream Framework for Click-Through Rate Prediction by Tracking Interest Evolution[2020].pdf
├── [LSM]Lifelong Sequential Modeling with Personalized Memorization for User Response Prediction[2019].pdf
├── [MIMN]Practice on Long Sequential User Behavior Modeling for Click-Through Rate Prediction[2019].pdf
├── [NTM]Neural Turing Machines[2014].pdf
├── [NTM]The_NTM_Introduction_And_Implementation[2017].pdf
├── [REFORMER] THE EFFICIENT TRANSFORMER[2020].pdf
├── [SIM]Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction[2020].pdf
├── [Self-Attention]Attention is all you need(Google 2017).pdf
└── [SeqFM]Sequence-Aware Factorization Machines(2019).pdf
└── interactive
├── [AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf
├── [AutoInt] AutoInt Automatic Feature Interaction Learning via Self-Attentive Neural Networks(CIKM 2019).pdf
├── [DCN] Deep & Cross Network for Ad Click Predictions (Stanford 2017).pdf
├── [Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf
├── [DeepFM] A Factorization-Machine based Neural Network for CTR Prediction (HIT-Huawei 2017).pdf
├── [FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf
├── [FM] Fast Context-aware Recommendations with Factorization Machines (UKON 2011).pdf
├── [FNN] Deep Learning over Multi-field Categorical Data (UCL 2016).pdf
├── [LR] Predicting Clicks - Estimating the Click-Through Rate for New Ads (Microsoft 2007).pdf
├── [NFM] Neural Factorization Machines for Sparse Predictive Analytics (NUS 2017).pdf
├── [PNN] Product-based Neural Networks for User Response Prediction (SJTU 2016).pdf
├── [Wide & Deep] Wide & Deep Learning for Recommender Systems (Google 2016).pdf
└── [xDeepFM] xDeepFM - Combining Explicit and Implicit Feature Interactions for Recommender Systems (USTC 2018).pdf
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
3 |
--------------------------------------------------------------------------------
/.idea/CTR_Function.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CTR_Function
2 |
3 | 
4 | 
5 | 
6 | 
7 | 
8 | Email:hjq1922451756@gmail.com
9 |
10 | 食用方式:
11 | pip install Data-Function
12 | from kon. ...
13 |
14 | >1.[code folder](https://github.com/TIXhjq/CTR_Function/tree/master/code)=[nn,feature_eng])
15 | >
16 | >>1)新鲜,可食用
17 | >>>[[building...] CTR](https://github.com/TIXhjq/CTR_Function/tree/master/kon/model/ctr_model)
18 | >
19 | >>2)需要重构部分(历史代码)
20 | >>>[Graph(重构)](https://github.com/TIXhjq/CTR_Function/tree/master/kon/model/embedding)
21 | >>>[[building...] Feature Enginner(在scala上进行重构)](https://github.com/TIXhjq/CTR_Function/tree/master/kon/model/feature_eng)
22 | >
23 | >2.[paper](https://github.com/TIXhjq/CTR_Function/tree/master/paper) [nn(目前只有CTR部分,剩下的下次一定^_^)
24 | >3.[use example](https://github.com/TIXhjq/CTR_Function/tree/master/example) [ctr_example(充足),else(maybe...)
25 | >4.[data](https://github.com/TIXhjq/CTR_Function/tree/master/data)
26 |
27 | p.s 每层目录有对应的详细介绍...
28 |
29 | [部分nn模型小结,不过比较懒,就几个就是了](https://zhuanlan.zhihu.com/c_1145034612807028736)
30 |
--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | data url:
2 | [DataSet](https://www.dropbox.com/s/jjyygph9wm36fmr/dataset.tar.gz?dl=0)
3 | download to ./
--------------------------------------------------------------------------------
/example/ctr_example/dmin_seq.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # _*_ coding:utf-8 _*_
4 | '''=================================
5 | @Author :tix_hjq
6 | @Date :2020/11/23 下午7:15
7 | @File :dmin_seq.py
8 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
9 | ================================='''
10 | from kon.model.ctr_model.model.models import *
11 |
12 | warnings.filterwarnings("ignore")
13 | pd.set_option('display.max_columns', None)
14 | pd.set_option('display.max_rows', None)
15 | pd.set_option('max_colwidth', 100)
16 |
17 | print(os.getcwd())
18 | # ----------------------------------------------------
19 | data_folder = '../../data/'
20 | origin_data_folder = data_folder + 'origin_data/'
21 | submit_data_folder = data_folder + 'submit_data/'
22 | eda_data_folder = data_folder + 'eda_data/'
23 | fea_data_folder = data_folder + 'fea_data/'
24 | # -----------------------------------------------------------------
25 | model_tool = base_model(submit_data_folder)
26 | fea_tool = feature_tool(fea_data_folder)
27 | data_pre = data_prepare(batch_size=32)
28 | # -----------------------------------------------------------------
29 | columns = ["date", "user_id", "price", "ad_id", "cate_id", "target", "day"]
30 |
31 | trainDf = pd.read_csv(origin_data_folder + 'ali_data/train.csv', usecols=columns, nrows=100)
32 | testDf = pd.read_csv(origin_data_folder + 'ali_data/test.csv', usecols=columns, nrows=100)
33 |
34 | df = pd.concat([trainDf, testDf], axis=0)
35 | df["date"] = pd.to_datetime(df.date)
36 | df.sort_values(["date"], inplace=True)
37 | print(df.head())
38 |
--------------------------------------------------------------------------------
/example/ctr_example/gsp_seq.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/6/27 下午3:29
6 | @File :gsp_seq.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
9 | from kon.model.ctr_model.model.models import *
10 | warnings.filterwarnings("ignore")
11 | pd.set_option('display.max_columns', None)
12 | pd.set_option('display.max_rows', None)
13 | pd.set_option('max_colwidth', 100)
14 |
15 | print(os.getcwd())
16 | #----------------------------------------------------
17 | data_folder = '../../data/'
18 | origin_data_folder = data_folder + 'origin_data/'
19 | submit_data_folder = data_folder + 'submit_data/'
20 | eda_data_folder = data_folder + 'eda_data/'
21 | fea_data_folder = data_folder + 'fea_data/'
22 | #-----------------------------------------------------------------
23 | model_tool = base_model(submit_data_folder)
24 | fea_tool = feature_tool(fea_data_folder)
25 | data_pre=data_prepare()
26 | #-----------------------------------------------------------------
27 | trainDf=pd.read_csv(origin_data_folder+'gsp_train.csv')
28 | testDf=pd.read_csv(origin_data_folder+'gsp_test.csv')
29 |
30 | sparse_fea=['userid','target_item','pos_ts']
31 | seq_fea=['item_seq','gsp_seq']
32 | target_fea=['target']
33 |
34 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf)
35 | seqDf=df[seq_fea]
36 | sparseDf=df[sparse_fea]
37 | targetDf=df[target_fea]
38 |
39 | print(targetDf['target'].value_counts())
40 |
41 | seqDf,seqIdx,seqInfo=data_pre.seq_deal(
42 | seqDf,max_len=[90]*2,embedding_dim=[8]*2,mask_zero=True,is_trainable=True,is_str_list=False,is_str=True,
43 | pre_weight=None,sample_num=5)
44 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf)
45 |
46 | train,val=data_pre.extract_train_test(
47 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf)
48 |
49 | behaviorFea=['item_seq','gsp_seq']
50 |
51 | model=BST(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),behaviorFea=behaviorFea)
52 | print(model.summary())
53 |
54 | model.compile(loss="mean_squared_error",optimizer='adam',metrics=['accuracy'])
55 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)])
--------------------------------------------------------------------------------
/example/ctr_example/seq.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | '''=================================
3 | @Author :tix_hjq
4 | @Date :2020/5/22 下午4:40
5 | @File :seq.py
6 | ================================='''
7 | from kon.model.ctr_model.model.models import *
8 |
9 | warnings.filterwarnings("ignore")
10 | pd.set_option('display.max_columns', None)
11 | pd.set_option('display.max_rows', None)
12 | pd.set_option('max_colwidth', 100)
13 |
14 | print(os.getcwd())
15 | #----------------------------------------------------
16 | data_folder = '../../data/'
17 | origin_data_folder = data_folder + 'origin_data/'
18 | submit_data_folder = data_folder + 'submit_data/'
19 | eda_data_folder = data_folder + 'eda_data/'
20 | fea_data_folder = data_folder + 'fea_data/'
21 | #-----------------------------------------------------------------
22 | model_tool = base_model(submit_data_folder)
23 | fea_tool = feature_tool(fea_data_folder)
24 | data_pre=data_prepare(batch_size=32)
25 | #-----------------------------------------------------------------
26 | trainDf=pd.read_csv(origin_data_folder+'seq_train.csv')
27 | testDf=pd.read_csv(origin_data_folder+'seq_test.csv')
28 |
29 | sparse_fea=['user_id','item_id','item_cate']
30 | seq_fea=['buy_list','cate_list']
31 | # seq_fea=['buy_list']
32 | target_fea=['target']
33 |
34 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf)
35 | seqDf=df[seq_fea]
36 | sparseDf=df[sparse_fea]
37 | targetDf=df[target_fea]
38 |
39 | seqDf,seqIdx,seqInfo=data_pre.seq_deal(
40 | seqDf,max_len=[90]*2,embedding_dim=[8]*2,mask_zero=True,is_trainable=True,
41 | pre_weight=None,sample_num=5)
42 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf)
43 |
44 | train,val=data_pre.extract_train_test(
45 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf)
46 |
47 | candidateFea=['item_id','item_cate']
48 | behaviorFea=['buy_list','cate_list']
49 |
50 | model=DIEN(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),candidateFea=candidateFea,behaviorFea=behaviorFea)
51 | print(model.summary())
52 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()])
53 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)])
--------------------------------------------------------------------------------
/example/ctr_example/session.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/6/9 上午11:06
6 | @File :session.py
7 | ================================='''
8 | from numpy.random import random
9 | from kon.model.ctr_model.model.models import *
10 | from kon.utils.data_prepare import data_prepare
11 |
12 | warnings.filterwarnings("ignore")
13 | pd.set_option('display.max_columns', None)
14 | pd.set_option('display.max_rows', None)
15 | pd.set_option('max_colwidth', 100)
16 |
17 | print(os.getcwd())
18 | #----------------------------------------------------
19 | data_folder = '../../data/'
20 | origin_data_folder = data_folder + 'origin_data/'
21 | submit_data_folder = data_folder + 'submit_data/'
22 | eda_data_folder = data_folder + 'eda_data/'
23 | fea_data_folder = data_folder + 'fea_data/'
24 | #-----------------------------------------------------------------
25 | model_tool = base_model(submit_data_folder)
26 | fea_tool = feature_tool(fea_data_folder)
27 | data_pre=data_prepare()
28 | #-----------------------------------------------------------------
29 | np.random.seed(2020)
30 | tf.random.set_seed(2020)
31 |
32 | trainDf=pd.read_csv(origin_data_folder+'session_train.csv')
33 | testDf=pd.read_csv(origin_data_folder+'session_test.csv')
34 |
35 | session_maxLen=10
36 | session_maxNum=20
37 | sparse_fea=['region','prev','vid','cid','class_id']
38 | dense_fea=['title_length']
39 | seq_fea=['click_item_session']
40 | target_fea=['label']
41 |
42 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf)
43 | seqDf=df[seq_fea]
44 | sparseDf=df[sparse_fea]
45 | denseDf=df[dense_fea]
46 | targetDf=df[target_fea]
47 |
48 | seqDf,seqInfo=data_pre.sparse_wrap(seqDf,seqIdx_path=origin_data_folder+'session_seq_idx.pkl',max_len=[session_maxLen*session_maxNum]*1,embedding_dim=[8]*1)
49 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf)
50 | denseDf,denseInfo=data_pre.dense_fea_deal(denseDf)
51 |
52 | train,val=data_pre.extract_train_test(
53 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf,denseDf=denseDf)
54 |
55 | candidateFea=['vid']
56 | behaviorFea=['click_item_session']
57 |
58 | model=DSIN(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),candidateFea=candidateFea,behaviorFea=behaviorFea)
59 | print(model.summary())
60 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()])
61 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)])
--------------------------------------------------------------------------------
/example/ctr_example/session_prepare.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/17 下午4:27
6 | @File :session_prepare.py
7 | ================================='''
8 | from pandas import DataFrame
9 | import gc
10 | from scipy import stats
11 | from kon.model.ctr_model.model.models import *
12 | from kon.utils.data_prepare import data_prepare
13 |
14 | warnings.filterwarnings("ignore")
15 | pd.set_option('display.max_columns', None)
16 | pd.set_option('display.max_rows', None)
17 | pd.set_option('max_colwidth', 100)
18 |
19 | print(os.getcwd())
20 | #----------------------------------------------------
21 | data_folder = '../../data/'
22 | origin_data_folder = data_folder + 'origin_data/mgtv_data/'
23 | submit_data_folder = data_folder + 'submit_data/'
24 | eda_data_folder = data_folder + 'eda_data/'
25 | fea_data_folder = data_folder + 'fea_data/'
26 | #-----------------------------------------------------------------
27 | model_tool = base_model(submit_data_folder)
28 | fea_tool = feature_tool(fea_data_folder)
29 | data_format=data_prepare()
30 | #-----------------------------------------------------------------
31 | def pareper():
32 | context=pd.read_parquet(origin_data_folder+'context1.parquet')
33 | item=pd.read_parquet(origin_data_folder+'item.parquet')
34 | user=pd.read_csv(origin_data_folder+'user.parquet')
35 |
36 | user=user.merge(context,how='left',on=['did'])
37 | user=user.merge(item,how='left',on=['vid'])
38 |
39 | logs_fea=['click_item','click_time']
40 | user_fea=['did','region','prev']
41 | ad_fea=['vid','cid','class_id','title_length']
42 | target_fea=['label']
43 |
44 | use_fea=logs_fea+user_fea+ad_fea+target_fea
45 |
46 | user=user[use_fea]
47 | user.drop_duplicates(['did'],inplace=True)
48 | user.to_csv(origin_data_folder+'data.csv',index=None)
49 |
50 | df=pd.read_csv(origin_data_folder+'part_29/data.csv')
51 | df=pd.concat([df,pd.read_csv(origin_data_folder+'part_30/data.csv')],axis=0)
52 | df.to_csv(origin_data_folder+'data.csv',index=None)
53 |
54 |
55 | def generator_session_idx(df, group_cols: list = ['did', 'click_time'], item_cols: str = 'click_item'):
56 | '''
57 | :param df:
58 | format:
59 | user_id time item
60 | 1 1 1
61 | :param group_cols:
62 | format: list ==> [user,time]
63 | [groupby sign index:user_id,groupby time index:session split time]
64 | :param item_cols:
65 | item cols
66 | :return:
67 | '''
68 |
69 | def session_list(x):
70 | return len(x.tolist())
71 |
72 | df = df.groupby(group_cols)[item_cols].agg(session_list).reset_index().rename(
73 | columns={item_cols: '{}_session_idx'.format(item_cols)})
74 |
75 | def seq_idx(x):
76 | s_ = 0
77 | need_list = ['0']
78 | for i in x.tolist():
79 | s_ += i
80 | need_list.append(str(s_))
81 | return ','.join(need_list)
82 |
83 | df = df.groupby([group_cols[0]])['{}_session_idx'.format(item_cols)].agg(seq_idx).reset_index()
84 |
85 | return df
86 |
87 | save_folder=data_folder + 'origin_data/'
88 | def perpare():
89 | ori_df=pd.read_csv(origin_data_folder+'data.csv')
90 | ori_df['seq_len']=[len(str(i).split(',')) for i in ori_df['click_item'].tolist()]
91 | seqDf,seq_idx,seqInfo=data_format.seq_deal(seqDf=ori_df[['click_item']],embedding_dim=[8],is_str=True,is_str_list=False,use_wrap=False)
92 | ori_df['click_item']=[','.join([str(j) for j in i]) for i in seqDf['click_item']]
93 | fea_tool.pickle_op(path=save_folder+'session_seq_idx.pkl',is_save=True,file=seq_idx)
94 |
95 | return ori_df
96 |
97 | import time
98 | def get_time(timeStamp):
99 | timeArray = time.localtime(int(timeStamp))
100 | return time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
101 |
102 | def gen_session_seq(session_maxLen,session_maxNum):
103 | ori_df=perpare()
104 | df=ori_df
105 | df.dropna(inplace=True)
106 | df['click_time']=[','.join([get_time(j) for j in i.split(',')]) for i in df['click_time'].tolist()]
107 | # 1h as split session
108 | time_list=[i.split(',')for i in df['click_time'].tolist()]
109 | item_list=[i.split(',')for i in df['click_item'].tolist()]
110 | did_list=[[i]*len(l) for i,l in zip(df['did'].tolist(),item_list)]
111 |
112 | df=DataFrame()
113 | t_list = []
114 | i_list = []
115 | d_list = []
116 | for t_,i_,d_ in zip(time_list,item_list,did_list):
117 | t_list+=t_
118 | i_list+=i_
119 | d_list+=d_
120 | df['click_time']=t_list
121 | df['click_item']=i_list
122 | df['did']=d_list
123 |
124 | df['click_time']=pd.to_datetime(df['click_time'])
125 | df['click_time']=df['click_time'].dt.day*100+df['click_time'].dt.hour
126 | df['click_item']=df['click_item'].astype('str')
127 |
128 | df=data_format.generator_session(df,group_cols=['did','click_time'],item_cols='click_item',session_maxLen=session_maxLen)
129 | df=data_format.generator_seq(df,group_cols=['did','click_time'],item_cols='click_item',session_maxNum=session_maxNum,session_maxLen=session_maxLen)
130 |
131 | del ori_df['click_time']
132 | ori_df=ori_df.merge(df,how='left',on=['did'])
133 | ori_df.to_csv('../../data/origin_data/data.csv',index=None)
134 |
135 | def get_session_seq(df,item_col,max_session_length=10):
136 | session_seq=[
137 | [item_.split(',')[int(s_):int(e_)]
138 | for s_,e_ in zip(idx_.split(',')[:-1],idx_.split(',')[1:])]
139 | for item_,idx_ in zip(df[item_col].tolist(),df['{}_session_idx'.format(item_col)].tolist())]
140 | return [[tf.keras.preprocessing.sequence.pad_sequences(seq,maxlen=max_session_length) for seq in i]for i in session_seq]
141 |
142 |
143 | def check_length():
144 | df=pd.read_csv('../../data/origin_data/data.csv')
145 | df['seq_len']=[len(i.split(' ')) for i in df['click_item'].tolist()]
146 | df['session_len_mod']=[stats.mode([len(j.split(',')) for j in i.split(' ')]) for i in df['click_item'].tolist()]
147 | df['session_len_mean']=[np.mean([len(j.split(',')) for j in i.split(' ')]) for i in df['click_item'].tolist()]
148 | df['session_len_mediandf ']=[np.median([len(j.split(',')) for j in i.split(' ')]) for i in df['click_item'].tolist()]
149 |
150 | print(df.session_len_mod.value_counts())
151 | print(df.session_len_mean.value_counts())
152 | print(df.session_len_median.value_counts())
153 |
154 | session_maxLen=10
155 | session_maxNum=20
156 | gen_session_seq(session_maxLen,session_maxNum)
157 |
158 | df=pd.read_csv('../../data/origin_data/data.csv')
159 | del df['seq_len'],df['did'],df['click_item']
160 | gc.collect()
161 |
162 | train_df=df.loc[:df.shape[0]*0.8]
163 | test_df=df.loc[df.shape[0]*0.8:]
164 |
165 | train_df.to_csv(save_folder+'session_train.csv',index=None)
166 | test_df.to_csv(save_folder+'session_test.csv',index=None)
167 |
168 |
--------------------------------------------------------------------------------
/example/ctr_example/sim_seq.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | '''=================================
3 | @Author :tix_hjq
4 | @Date :2020/11/22 上午10:18
5 | @File :sim_seq.py
6 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
7 | ================================='''
8 | from kon.model.ctr_model.model.models import *
9 |
10 | warnings.filterwarnings("ignore")
11 | pd.set_option('display.max_columns', None)
12 | pd.set_option('display.max_rows', None)
13 | pd.set_option('max_colwidth', 100)
14 |
15 | print(os.getcwd())
16 | #----------------------------------------------------
17 | data_folder = '../../data/'
18 | origin_data_folder = data_folder + 'origin_data/'
19 | submit_data_folder = data_folder + 'submit_data/'
20 | eda_data_folder = data_folder + 'eda_data/'
21 | fea_data_folder = data_folder + 'fea_data/'
22 | #-----------------------------------------------------------------
23 | model_tool = base_model(submit_data_folder)
24 | fea_tool = feature_tool(fea_data_folder)
25 | data_pre=data_prepare(batch_size=32)
26 | #-----------------------------------------------------------------
27 |
28 | trainDf=pd.read_csv(origin_data_folder+'seq_train.csv')
29 | testDf=pd.read_csv(origin_data_folder+'seq_test.csv')
30 |
31 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf)
32 |
33 | reduceSeq,reduceCate=data_pre.hard_search(seqData=fea_tool.batch_convert_list(df["cate_list"]),
34 | seqCate=fea_tool.batch_convert_list(df["cate_list"]),
35 | targetCate=df["item_cate"].tolist())
36 | df["reduce_seq"]=reduceSeq
37 | df["reduce_seq"]=df["reduce_seq"].astype("str")
38 | df["reduce_cate"]=reduceCate
39 | df["reduce_cate"]=df["reduce_cate"].astype("str")
40 |
41 | sparse_fea=['user_id','item_id','item_cate']
42 | reduce_fea=['reduce_seq','reduce_cate']
43 | seq_fea=["buy_list","cate_list"]+reduce_fea
44 | target_fea=['target']
45 |
46 | seqDf=df[seq_fea]
47 | sparseDf=df[sparse_fea]
48 | targetDf=df[target_fea]
49 |
50 | seqDf,seqIdx,seqInfo=data_pre.seq_deal(
51 | seqDf,max_len=[90]*4,embedding_dim=[8]*4,mask_zero=True,is_trainable=True,
52 | pre_weight=None,sample_num=5,use_wrap=True)
53 |
54 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf)
55 |
56 | train,val=data_pre.extract_train_test(
57 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf)
58 |
59 | model=SIM(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),reduceFea=reduce_fea,candidateFea=["item_id","item_cate"],behaviorFea=seq_fea)
60 | print(model.summary())
61 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()])
62 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)])
--------------------------------------------------------------------------------
/example/ctr_example/timeInterval.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | '''=================================
3 | @Author :tix_hjq
4 | @Date :2020/6/24 下午1:13
5 | @File :timeInterval.py
6 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
7 | ================================='''
8 | from kon.model.ctr_model.model.models import *
9 |
10 | warnings.filterwarnings("ignore")
11 | pd.set_option('display.max_columns', None)
12 | pd.set_option('display.max_rows', None)
13 | pd.set_option('max_colwidth', 100)
14 |
15 | print(os.getcwd())
16 | #----------------------------------------------------
17 | data_folder = '../../data/'
18 | origin_data_folder = data_folder + 'origin_data/'
19 | submit_data_folder = data_folder + 'submit_data/'
20 | eda_data_folder = data_folder + 'eda_data/'
21 | fea_data_folder = data_folder + 'fea_data/'
22 | #-----------------------------------------------------------------
23 | model_tool = base_model(submit_data_folder)
24 | fea_tool = feature_tool(fea_data_folder)
25 | data_pre=data_prepare()
26 | #-----------------------------------------------------------------
27 | trainDf=pd.read_csv(origin_data_folder+'time_inter_train.csv')
28 | testDf=pd.read_csv(origin_data_folder+'time_inter_test.csv')
29 |
30 | sparse_fea=['did','region','vid','cid']
31 | seq_fea=['click_item','click_interval']
32 | target_fea=['label']
33 |
34 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf)
35 | seqDf=df[seq_fea]
36 | sparseDf=df[sparse_fea]
37 | targetDf=df[target_fea]
38 |
39 | seqDf,seqIdx,seqInfo=data_pre.seq_deal(
40 | seqDf=seqDf,embedding_dim=[8,0],max_len=[90]*2,is_str_list=False,
41 | is_str=True,sample_num=5)
42 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf)
43 |
44 | train,val=data_pre.extract_train_test(
45 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf)
46 |
47 |
48 | userFea=['region']
49 | timestampFea=['click_interval']
50 | behaviorFea=['click_item']
51 | targetFea=['vid']
52 |
53 | model=DTS(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),userFea=userFea,
54 | timestampFea=timestampFea,behaviorFea=behaviorFea,targetFea=targetFea)
55 | print(model.summary())
56 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()])
57 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)])
--------------------------------------------------------------------------------
/example/ctr_example/un_seq.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/3 下午4:59
6 | @File :un_seq.py
7 | ================================='''
8 | from numpy.random import random
9 | import tensorflow as tf
10 | import pandas as pd
11 | import numpy as np
12 | import warnings
13 | import os
14 | from kon.model.ctr_model.model.models import *
15 |
16 | warnings.filterwarnings("ignore")
17 | pd.set_option('display.max_columns', None)
18 | pd.set_option('display.max_rows', None)
19 | pd.set_option('max_colwidth', 100)
20 |
21 | print(os.getcwd())
22 | #----------------------------------------------------
23 | data_folder = '../../data/'
24 | origin_data_folder = data_folder + 'origin_data/'
25 | submit_data_folder = data_folder + 'submit_data/'
26 | eda_data_folder = data_folder + 'eda_data/'
27 | fea_data_folder = data_folder + 'fea_data/'
28 | #-----------------------------------------------------------------
29 | model_tool = base_model(submit_data_folder)
30 | fea_tool = feature_tool(fea_data_folder)
31 | prepare_tool=data_prepare()
32 | #-----------------------------------------------------------------
33 | np.random.seed(2020)
34 | tf.random.set_seed(2020)
35 |
36 | train_df=pd.read_csv(origin_data_folder+'unseq_train.csv',nrows=100).rename(columns={'target':'label'})
37 | test_df=pd.read_csv(origin_data_folder+'unseq_test.csv',nrows=100).rename(columns={'target':'label'})
38 |
39 | sparse_fea=[str(i) for i in range(14,40)]
40 | dense_fea=[str(i) for i in range(1,14)]
41 | target_fea=['label']
42 |
43 | val_index=np.random.choice(train_df.index.tolist(),size=int(train_df.shape[0]*0.3))
44 | train_index=[i for i in train_df.index.tolist()if i not in val_index]
45 |
46 | df,(train_idx,test_idx)=prepare_tool.concat_test_train(train_df,test_df)
47 | sparseDf=df[sparse_fea]
48 | denseDf=df[dense_fea]
49 | targetDf=df[target_fea]
50 |
51 | sparseDf,sparseInfo=prepare_tool.sparse_fea_deal(sparseDf)
52 | denseDf,denseInfo=prepare_tool.dense_fea_deal(denseDf)
53 |
54 | train_df,test_df,y_train,y_test=prepare_tool.extract_train_test(train_idx=train_idx,test_idx=test_idx,sparseDf=sparseDf,denseDf=denseDf,targetDf=targetDf,use_softmax=True)
55 | # train_df,test_df,y_train,y_test=prepare_tool.extract_train_test(train_idx=train_idx,test_idx=test_idx,sparseDf=sparseDf,targetDf=targetDf)
56 | train,val=prepare_tool.split_val_set(train_df,y_train,train_index,val_index)
57 | #----------------------------train model--------------------------------------
58 |
59 | model=FM(prepare_tool.FeatureInput(sparseInfo=sparseInfo,denseInfo=denseInfo,useAddLinear=False,useLinear=True,useFlattenLinear=False))
60 | print(model.summary())
61 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()])
62 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)])
--------------------------------------------------------------------------------
/kon/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/7/21 上午9:00
6 | @File :__init__.py.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
--------------------------------------------------------------------------------
/kon/model/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/29 下午4:01
6 | @File :__init__.py.py
7 | ================================='''
8 |
--------------------------------------------------------------------------------
/kon/model/ctr_model/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 |
4 | CTR MODEL ACHIEVE:
5 | >[1]Interactive Model
6 | >>1.[[FM] Fast Context-aware Recommendations with Factorization Machines (UKON 2011).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BFM%5D%20Fast%20Context-aware%20Recommendations%20with%20Factorization%20Machines%20(UKON%202011).pdf)
7 | >>2.[[PNN] Product-based Neural Networks for User Response Prediction (SJTU 2016).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BPNN%5D%20Product-based%20Neural%20Networks%20for%20User%20Response%20Prediction%20(SJTU%202016).pdf)
8 | >>3.[[Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BDeep%20Crossing%5D%20Deep%20Crossing%20-%20Web-Scale%20Modeling%20without%20Manually%20Crafted%20Combinatorial%20Features%20(Microsoft%202016).pdf)
9 | >>4.[[Wide & Deep] Wide & Deep Learning for Recommender Systems (Google 2016).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BWide%20%26%20Deep%5D%20Wide%20%26%20Deep%20Learning%20for%20Recommender%20Systems%20(Google%202016).pdf)
10 | >>5.[[DeepFM] A Factorization-Machine based Neural Network for CTR Prediction (HIT-Huawei 2017).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BDeepFM%5D%20A%20Factorization-Machine%20based%20Neural%20Network%20for%20CTR%20Prediction%20(HIT-Huawei%202017).pdf)
11 | >>6.[[DCN] Deep & Cross Network for Ad Click Predictions (Stanford 2017).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BDCN%5D%20Deep%20%26%20Cross%20Network%20for%20Ad%20Click%20Predictions%20(Stanford%202017).pdf)
12 | >>7.[[NFM] Neural Factorization Machines for Sparse Predictive Analytics (NUS 2017).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BNFM%5D%20Neural%20Factorization%20Machines%20for%20Sparse%20Predictive%20Analytics%20(NUS%202017).pdf)
13 | >>8.[[xDeepFM] xDeepFM - Combining Explicit and Implicit Feature Interactions for Recommender Systems (USTC 2018).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BxDeepFM%5D%20xDeepFM%20-%20Combining%20Explicit%20and%20Implicit%20Feature%20Interactions%20for%20Recommender%20Systems%20(USTC%202018).pdf)
14 | >>9.[[AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BAFM%5D%20Attentional%20Factorization%20Machines%20-%20Learning%20the%20Weight%20of%20Feature%20Interactions%20via%20Attention%20Networks%20(ZJU%202017).pdf)
15 | >>10.[[AutoInt] AutoInt Automatic Feature Interaction Learning via Self-Attentive Neural Networks(CIKM 2019).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BAutoInt%5D%20AutoInt%20Automatic%20Feature%20Interaction%20Learning%20via%20Self-Attentive%20Neural%20Networks(CIKM%202019).pdf)
16 | >>...Later Building...
17 |
18 | >[2]Behavior Model
19 | >>1.[[DIN] Deep Interest Network for Click-Through Rate Prediction (Alibaba 2018).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDIN%5D%20Deep%20Interest%20Network%20for%20Click-Through%20Rate%20Prediction%20(Alibaba%202018).pdf)
20 | >>2.[[DIEN] Deep Interest Evolution Network for Click-Through Rate Prediction (Alibaba 2019).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDIEN%5D%20Deep%20Interest%20Evolution%20Network%20for%20Click-Through%20Rate%20Prediction%20(Alibaba%202019).pdf)
21 | >>3.[[DSIN]Deep Session Interest Network for Click-Through Rate Predicti(Alibaba 2019).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDSIN%5DDeep%20Session%20Interest%20Network%20for%20Click-Through%20Rate%20Predicti%5B2019%5D.pdf)
22 | >>4.[[SeqFM]Sequence-Aware Factorization Machines(2019).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BSeqFM%5DSequence-Aware%20Factorization%20Machines(2019).pdf)
23 | >>5.[[DTS]Deep Time-Stream Framework for Click-Through Rate Prediction by Tracking Interest Evolution[2020].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDTSF%5DDeep%20Time-Stream%20Framework%20for%20Click-Through%20Rate%20Prediction%20by%20Tracking%20Interest%20Evolution%5B2020%5D.pdf)
24 | >>6.[[BST]Behavior Sequence Transformer for E-commerce Recommendation in Alibaba[2019].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BBST%5DBehavior%20Sequence%20Transformer%20for%20E-commerce%20Recommendation%20in%20Alibaba%5B2019%5D.pdf)
25 | >>7.[[MIMN]Practice on Long Sequential User Behavior Modeling for Click-Through Rate Prediction[2019].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BMIMN%5DPractice%20on%20Long%20Sequential%20User%20Behavior%20Modeling%20for%20Click-Through%20Rate%20Prediction%5B2019%5D.pdf)
26 | >>...Later Building...
27 |
28 | >[3]Next Building
29 | >>1.[reading][[LSM]Lifelong Sequential Modeling with Personalized Memorization for User Response Prediction[2019].pdf]()
30 | >>2.[building][[DSTN]Deep Spatio-Temporal Neural Networks for Click-Through Rate Prediction[2019].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDSTN%5DDeep%20Spatio-Temporal%20Neural%20Networks%20for%20Click-Through%20Rate%20Prediction%5B2019%5D.pdf)
31 | >>3.[reading][[FiBiNET]Combining Feature Importance and Bilinear featureInteraction for Click-Through Rate Predict[2019].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/Next%20Read/%5BFiBiNET%5DCombining%20Feature%20Importance%20and%20Bilinear%20featureInteraction%20for%20Click-Through%20Rate%20Predict%5B2019%5D.pdf)
32 | >>4.[building][[SIM]Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction[2020].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BSIM%5DSearch-based%20User%20Interest%20Modeling%20with%20Lifelong%20Sequential%20Behavior%20Data%20for%20Click-Through%20Rate%20Prediction%5B2020%5D.pdf)
33 | >>......
34 |
35 | p.s
36 | 1.DIEN,paper中的控制更新门并没有实际实现,实际上因为keras里面的
37 | 我只弄了standardLstm,但CudnnLstm改动起来有点麻烦,实际上这里是是直接使
38 | 用weight*hidden_state
39 | 2.[building]以后默认采用更快速的mult-attention[1],会在涉及product部分提供hash选项
40 |
41 | p.s并不是复现,现在在家没机器,逻辑上应该问题不大,用的部分采样数据,测试模型连通,有问题的话欢迎交流.
42 |
43 |
44 | [[1][REFORMER] THE EFFICIENT TRANSFORMER[2020].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BREFORMER%5D%20THE%20EFFICIENT%20TRANSFORMER%5B2020%5D.pdf)
--------------------------------------------------------------------------------
/kon/model/ctr_model/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/29 下午4:01
6 | @File :__init__.py.py
7 | ================================='''
8 | import pandas as pd
9 | import warnings
10 | import os
11 | from kon.model.feature_eng.feature_transform import feature_tool
12 | from kon.model.feature_eng.base_model import base_model
13 |
14 | warnings.filterwarnings("ignore")
15 | pd.set_option('display.max_columns', None)
16 | pd.set_option('display.max_rows', None)
17 | pd.set_option('max_colwidth', 100)
18 |
19 | print(os.getcwd())
20 | #----------------------------------------------------
21 | data_folder = '../../data/'
22 | origin_data_folder = data_folder + 'origin_data/'
23 | submit_data_folder = data_folder + 'submit_data/'
24 | eda_data_folder = data_folder + 'eda_data/'
25 | fea_data_folder = data_folder + 'fea_data/'
26 | #-----------------------------------------------------------------
27 | model_tool = base_model(submit_data_folder)
28 | fea_tool = feature_tool(fea_data_folder)
29 | #-----------------------------------------------------------------
--------------------------------------------------------------------------------
/kon/model/ctr_model/layer/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/29 下午4:01
6 | @File :__init__.py.py
7 | ================================='''
--------------------------------------------------------------------------------
/kon/model/ctr_model/layer/behavior_layer/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/6/15 上午11:38
6 | @File :__init__.py.py
7 | ================================='''
8 | import pandas as pd
9 | import warnings
10 | import os
11 | from kon.model.feature_eng.feature_transform import feature_tool
12 | from kon.model.feature_eng.base_model import base_model
13 |
14 | warnings.filterwarnings("ignore")
15 | pd.set_option('display.max_columns', None)
16 | pd.set_option('display.max_rows', None)
17 | pd.set_option('max_colwidth', 100)
18 |
19 | print(os.getcwd())
20 | #----------------------------------------------------
21 | data_folder = '../../data/'
22 | origin_data_folder = data_folder + 'origin_data/'
23 | submit_data_folder = data_folder + 'submit_data/'
24 | eda_data_folder = data_folder + 'eda_data/'
25 | fea_data_folder = data_folder + 'fea_data/'
26 | #-----------------------------------------------------------------
27 | model_tool = base_model(submit_data_folder)
28 | fea_tool = feature_tool(fea_data_folder)
29 | #-----------------------------------------------------------------
--------------------------------------------------------------------------------
/kon/model/ctr_model/layer/core_layer/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/6/15 上午11:40
6 | @File :__init__.py.py
7 | ================================='''
8 | import pandas as pd
9 | import warnings
10 | import os
11 | from kon.model.feature_eng.feature_transform import feature_tool
12 | from kon.model.feature_eng.base_model import base_model
13 |
14 | warnings.filterwarnings("ignore")
15 | pd.set_option('display.max_columns', None)
16 | pd.set_option('display.max_rows', None)
17 | pd.set_option('max_colwidth', 100)
18 |
19 | print(os.getcwd())
20 | #----------------------------------------------------
21 | data_folder = '../../data/'
22 | origin_data_folder = data_folder + 'origin_data/'
23 | submit_data_folder = data_folder + 'submit_data/'
24 | eda_data_folder = data_folder + 'eda_data/'
25 | fea_data_folder = data_folder + 'fea_data/'
26 | #-----------------------------------------------------------------
27 | model_tool = base_model(submit_data_folder)
28 | fea_tool = feature_tool(fea_data_folder)
29 | #-----------------------------------------------------------------
--------------------------------------------------------------------------------
/kon/model/ctr_model/layer/core_layer/core_layer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/3 上午11:41
6 | @File :core_layer.py
7 | ================================='''
8 | import tensorflow as tf
9 | from tensorflow.keras.initializers import glorot_uniform
10 | import pandas as pd
11 | import warnings
12 | import os
13 | from kon.model.feature_eng.feature_transform import feature_tool
14 | from kon.model.feature_eng.base_model import base_model
15 |
16 | warnings.filterwarnings("ignore")
17 | pd.set_option('display.max_columns', None)
18 | pd.set_option('display.max_rows', None)
19 | pd.set_option('max_colwidth', 100)
20 |
21 | print(os.getcwd())
22 | #----------------------------------------------------
23 | data_folder='../../data/'
24 | origin_data_folder=data_folder+'origin_data/'
25 | submit_data_folder=data_folder+'submit_data/'
26 | eda_data_folder=data_folder+'eda_data/'
27 | fea_data_folder=data_folder+'fea_data/'
28 | #-----------------------------------------------------------------
29 | model_tool=base_model(submit_data_folder)
30 | fea_tool=feature_tool(fea_data_folder)
31 | #-----------------------------------------------------------------
32 | class StackLayer(tf.keras.layers.Layer):
33 | '''
34 | support:
35 | concat(flatten)
36 | '''
37 | def __init__(self,use_flat=True,axis=None):
38 | super(StackLayer, self).__init__()
39 | if axis:
40 | self.concat = tf.keras.layers.Concatenate(axis=axis)
41 | else:
42 | self.concat = tf.keras.layers.Concatenate()
43 | self.use_flat=use_flat
44 |
45 | def build(self, input_shape):
46 | super(StackLayer, self).build(input_shape)
47 | self.flat = [tf.keras.layers.Flatten(name='stack_flatten_{}'.format(str(i))) for i in range(len(input_shape))]
48 |
49 | def call(self, inputs, **kwargs):
50 | if self.use_flat:
51 | inputs=[flat_(input_) for input_,flat_ in zip(inputs,self.flat)]
52 | if len(inputs)==1:
53 | return inputs[0]
54 | else:
55 | return self.concat(inputs)
56 |
57 |
58 | class ScoreLayer(tf.keras.layers.Layer):
59 | def __init__(self,use_add=False,use_inner=False,use_global=False,seed=2020):
60 | from kon.model.ctr_model.layer.interactive_layer.interactive_layer import InnerLayer
61 | super(ScoreLayer, self).__init__()
62 | self.use_add=use_add
63 | self.add=tf.keras.layers.Add()
64 | self.activate=tf.keras.layers.Activation('sigmoid')
65 | self.use_inner=use_inner
66 | self.inner=InnerLayer(use_inner=True)
67 | self.use_global=use_global
68 | self.seed=seed
69 |
70 | def build(self, input_shape):
71 | super(ScoreLayer, self).build(input_shape)
72 | if self.use_global:
73 | self.global_bias=self.add_weight(shape=(1,),initializer=glorot_uniform(self.seed))
74 |
75 | def call(self, inputs, **kwargs):
76 | if self.use_add:
77 | inputs=self.add(inputs)
78 | if self.use_global:
79 | inputs=self.add([inputs,self.global_bias])
80 | if self.use_inner:
81 | inputs=self.inner(inputs)
82 |
83 | output=self.activate(inputs)
84 | return output
85 |
86 | class MergeScoreLayer(tf.keras.layers.Layer):
87 | def __init__(self,use_merge:bool=True,output_dim=2):
88 | super(MergeScoreLayer, self).__init__()
89 | self.concat=StackLayer()
90 | self.dense=tf.keras.layers.Dense(units=output_dim,activation='softmax')
91 | self.use_merge=use_merge
92 |
93 | def build(self, input_shape):
94 | super(MergeScoreLayer, self).build(input_shape)
95 |
96 | def call(self, inputs, **kwargs):
97 | if self.use_merge:
98 | inputs=self.concat(inputs)
99 | x=self.dense(inputs)
100 | return x
101 |
102 | class HiddenLayer(tf.keras.layers.Layer):
103 | '''
104 | notice:
105 | can to replace dense,to use other method to cal
106 | e.g:can to mult-head-attention achieve autoint
107 | Dnn core:
108 | hidden achieve
109 | In feature, to drop it
110 | '''
111 | def __init__(self,hidden_units:int,use_bn:bool=True,seed=2020,l2_reg=0,other_dense=None):
112 | super(HiddenLayer, self).__init__()
113 | self.dense=tf.keras.layers.Dense(
114 | units=hidden_units,kernel_initializer=glorot_uniform(seed=seed),
115 | bias_initializer=glorot_uniform(seed=seed),kernel_regularizer=tf.keras.regularizers.l2(l2_reg)
116 | )
117 | if other_dense:
118 | self.dense=other_dense
119 | self.bn=tf.keras.layers.BatchNormalization()
120 | self.use_bn=use_bn
121 |
122 | def build(self, input_shape):
123 | super(HiddenLayer, self).build(input_shape)
124 |
125 | def call(self, inputs, **kwargs):
126 | x=self.dense(inputs)
127 | if self.use_bn:
128 | x=self.bn(x)
129 | return x,inputs
130 |
131 | class ResActivateLayer(tf.keras.layers.Layer):
132 | '''
133 | notice:
134 | res layer activate,support ln,bn...
135 | '''
136 | def __init__(self,use_bn,use_ln,hidden_activate):
137 | super(ResActivateLayer, self).__init__()
138 | self.use_ln = use_ln
139 | self.use_bn = use_bn
140 | self.ln = tf.keras.layers.LayerNormalization()
141 | self.bn = tf.keras.layers.BatchNormalization()
142 | self.active = hidden_activate
143 |
144 |
145 | def build(self, input_shape):
146 | super(ResActivateLayer, self).build(input_shape)
147 |
148 | def call(self, inputs, **kwargs):
149 | if self.use_bn:
150 | inputs = self.bn(inputs)
151 | if self.use_ln:
152 | inputs = self.ln(inputs)
153 |
154 | x = self.active(inputs)
155 |
156 | return x
157 |
158 |
159 | class DnnLayer(tf.keras.layers.Layer):
160 | def __init__(self,hidden_units:list=None,l2_reg=0,hidden_activate=tf.keras.layers.ReLU(),use_bn:bool=False,res_unit=1,
161 | output_dim=-1,seed=2020,other_dense=None,use_ln:bool=False,use_flatten=False,**kwargs):
162 | '''
163 | notice:
164 | dense of dnn can to replace other layer,
165 | e.g:mult head atten(autoInt),
166 | to_replace:other_dense,succ to replace.
167 |
168 | :param hidden_units:please make sure to need units list
169 | when use other dense,need to input it too.
170 | e.g need 3 hidden,but use other dense==>[[],[],[]]
171 | num is not import,shape is very import
172 |
173 | :param res_unit:res add skip num
174 |
175 | :param activate:hidden activate
176 | Dnn core:
177 | supports auto bn
178 | '''
179 | super(DnnLayer, self).__init__(**kwargs)
180 | self.hidden_list=other_dense
181 | if not other_dense:
182 | self.hidden_list=[HiddenLayer(hidden_units=dim,use_bn=False,other_dense=other_dense)for dim in hidden_units]
183 | self.activate=hidden_activate
184 | self.activate=[ResActivateLayer(use_bn=use_bn,use_ln=use_ln,hidden_activate=hidden_activate) for idx_ in range(len(self.hidden_list))]
185 | self.add=tf.keras.layers.Add()
186 | self.seed=2020
187 | self.output_dim=output_dim
188 | self.res_unit=res_unit
189 | if output_dim!=-1:
190 | self.logit_layer=tf.keras.layers.Dense(
191 | units=output_dim,kernel_initializer=glorot_uniform(seed=seed),
192 | bias_initializer=glorot_uniform(seed=seed)
193 | )
194 | if use_flatten:
195 | self.flat=tf.keras.layers.Flatten()
196 | self.use_flatten=use_flatten
197 |
198 | def build(self, input_shape):
199 | super(DnnLayer, self).build(input_shape)
200 |
201 | def call(self, inputs,**kwargs):
202 | x=inputs
203 | res=[[],[]]
204 | for idx_,hidden_layer in enumerate(self.hidden_list):
205 | [x,ori]=hidden_layer(x)
206 | if idx_==0:
207 | res=[ori,x]
208 | if (idx_+1)%self.res_unit!=0 or self.res_unit==1:
209 | res[-1]=x
210 | if (idx_+1)%self.res_unit==0:
211 | try:
212 | x=self.add(res)
213 | except ValueError:
214 | x=res[-1]
215 |
216 | x=self.activate[idx_](x)
217 | if (idx_+1)%self.res_unit==0:
218 | res[0]=x
219 |
220 | if self.use_flatten:
221 | x = self.flat(x)
222 |
223 | if self.output_dim!=-1:
224 | x=self.logit_layer(x)
225 |
226 | return x
227 |
228 | class IntraViewPoolingLayer(tf.keras.layers.Layer):
229 | def __init__(self):
230 | super(IntraViewPoolingLayer, self).__init__()
231 |
232 | def build(self, input_shape):
233 | super(IntraViewPoolingLayer, self).build(input_shape)
234 |
235 | def call(self, inputs, **kwargs):
236 | output=tf.expand_dims(tf.reduce_mean(inputs,axis=1),axis=1)
237 |
238 | return output
239 |
240 | class AlignLayer(tf.keras.layers.Layer):
241 | '''
242 | format dim,if [a,b.,.] dim not eq,
243 | format to [a,b...] higher dim
244 | '''
245 | def __init__(self):
246 | super(AlignLayer, self).__init__()
247 |
248 | def build(self, input_shape):
249 | super(AlignLayer, self).build(input_shape)
250 | dim_list=[i[-1] for i in input_shape]
251 | max_dim=max(dim_list)
252 | self.format_dense=[tf.keras.layers.Dense(
253 | units=max_dim) if iembedding info drop
115 | op:sum(embedding_list),but it represent ?
116 | '''
117 | super(OPnnLayer, self).__init__()
118 | self.seed=seed
119 | self.use_reduce=use_reduce
120 | self.outer=InnerLayer(use_inner=False,perm=[0,2,1],mod=(1,2))
121 | self.add=tf.keras.layers.Add()
122 | self.use_flatten=use_flatten
123 |
124 | def build(self, input_shape):
125 | super(OPnnLayer, self).build(input_shape)
126 | fea_size=len(input_shape)
127 | if self.use_reduce:
128 | fea_size=1
129 | self.flat=[tf.keras.layers.Flatten() for i in range(fea_size)]
130 |
131 | def call(self, inputs, **kwargs):
132 | if self.use_reduce:
133 | sum_inputs=self.add(inputs)
134 | # sum_inputs=tf.expand_dims(sum_inputs,axis=-1)
135 | outer_list=self.outer([sum_inputs,sum_inputs])
136 | else:
137 | # inputs=[tf.expand_dims(input_,axis=-1) for input_ in inputs]
138 | outer_list = self.outer(inputs)
139 |
140 | if self.use_flatten:
141 | outer_list=[flat_(outer_) for outer_,flat_ in zip(outer_list,self.flat)]
142 |
143 | return outer_list
144 |
145 | class FmLayer(tf.keras.layers.Layer):
146 | def __init__(self,use_inner:bool=True,mod=1,use_add=True,**kwargs):
147 | '''
148 | :param mod:
149 | 0.output matrix
150 | 1.output matrix result
151 | '''
152 | super(FmLayer, self).__init__(**kwargs)
153 | self.cross=InnerLayer(use_inner=use_inner,mod=mod,use_add=use_add)
154 | self.add = tf.keras.layers.Add()
155 | self.use_add=use_add
156 |
157 | def build(self,input_shape):
158 | super(FmLayer, self).build(input_shape)
159 | self.cross.build(input_shape)
160 |
161 | def call(self, inputs, **kwargs):
162 | '''
163 | :param inputs:[cross_embed,linear_embed]
164 | '''
165 | cross = self.cross(inputs[0])
166 | output = self.add([cross]+inputs[1])
167 | if self.use_add:
168 | return output
169 | else:
170 | return cross+inputs[1]
171 |
172 | class LinearLayer(tf.keras.layers.Layer):
173 | def __init__(self,initializer:str='random_normal'):
174 | super(LinearLayer,self).__init__()
175 | self.initalizer=initializer
176 |
177 | def build(self, input_shape):
178 | super(LinearLayer, self).build(input_shape)
179 | self.w = self.add_weight(shape=(input_shape[-1],1),
180 | initializer=self.initalizer,
181 | trainable=True)
182 | self.b = self.add_weight(shape=(1,),
183 | initializer=self.initalizer,
184 | trainable=True)
185 |
186 | def call(self,inputs,**kwargs):
187 | return [tf.tensordot(a=input,b=self.w,axes=1)+self.b for input in inputs]
188 |
189 | class SparseEmbed(tf.keras.layers.Layer):
190 | '''
191 | embedding core:
192 | supports sparse embed & linear
193 | supports:
194 | flatten,add
195 | '''
196 | def __init__(self,sparse_info:list,is_linear=False,use_flatten=True,use_add=False,seed=2020,support_masking=True,mask_zero=False):
197 | super(SparseEmbed,self).__init__()
198 | self.sparse_info=sparse_info
199 | self.flatten=None
200 | self.supports_masking=support_masking
201 | self.is_linear = is_linear
202 | self.mask_zero=mask_zero
203 | self.use_add = use_add
204 | self.seed=seed
205 |
206 | if use_flatten:
207 | self.flatten=[tf.keras.layers.Flatten()for i in sparse_info]
208 | if use_add:
209 | self.add=tf.keras.layers.Add()
210 |
211 | def build(self, input_shape):
212 | if not self.is_linear:
213 | self.embed=[tf.keras.layers.Embedding(
214 | name=info_.fea_name,input_dim=info_.word_size,output_dim=info_.cross_unit,
215 | mask_zero=info_.mask_zero,embeddings_initializer=glorot_uniform(seed=self.seed),
216 | input_length=info_.input_length,trainable=info_.is_trainable,weights=info_.pre_weight,
217 | embeddings_regularizer=tf.keras.regularizers.l2(info_.emb_reg)
218 | ) if info_.cross_unit!=0 else [] for info_ in self.sparse_info]
219 | else:
220 | self.embed=[tf.keras.layers.Embedding(
221 | name=info_.fea_name,input_dim=info_.word_size,output_dim=info_.linear_unit
222 | )for info_ in self.sparse_info]
223 | super(SparseEmbed, self).build(input_shape)
224 |
225 | def call(self,inputs,**kwargs):
226 |
227 | embed_list = [emb_(input_) if info_.cross_unit != 0 else input_ for emb_, input_, info_ in
228 | zip(self.embed ,inputs, self.sparse_info)]
229 |
230 | if self.flatten:
231 | embed_list=[flat_(embed_) for flat_,embed_ in zip(self.flatten,embed_list)]
232 |
233 | if self.use_add:
234 | embed_list=self.add(embed_list)
235 |
236 | self.embed_list=embed_list
237 |
238 | if self.mask_zero:
239 | return embed_list,\
240 | [emb._keras_mask if info_.cross_unit!=0 else [] for emb,info_ in zip(embed_list,self.sparse_info)]
241 | else:
242 | return embed_list
243 |
244 | def compute_mask(self, inputs, mask=None):
245 | if not self.mask_zero:
246 | return None
247 | return [embed._keras_mask for embed in self.embed_list]
248 |
249 |
250 | class CrossLayer(tf.keras.layers.Layer):
251 | '''
252 | DCN core:
253 | x^k=(x^k-1*x0)+b+x0(Recursive Format)
254 | '''
255 | def __init__(self,cross_hidden=3,seed=2020,**kwargs):
256 | super(CrossLayer, self).__init__(**kwargs)
257 | self.outer=InnerLayer(use_inner=False,mod=(-2,-1),perm=([0,2,1]))
258 | self.cross_hidden=cross_hidden
259 | self.seed=seed
260 | self.dot_=[tf.keras.layers.Dot(axes=1,name='Dot_{}'.format(str(i))) for i in range(cross_hidden)]
261 | self.add_=[tf.keras.layers.Add(name='Add_{}'.format(str(i)))for i in range(cross_hidden)]
262 |
263 |
264 | def build(self, input_shape):
265 | self.kernel=[
266 | self.add_weight(name='outer_weight_{}'.format(str(i)),
267 | shape=[input_shape[-1],1],initializer=glorot_uniform(seed=self.seed)
268 | )for i in range(self.cross_hidden)]
269 | self.bias=[
270 | self.add_weight(name='outer_bias_{}'.format(str(i)),
271 | shape=[input_shape[-1],1],initializer=tf.keras.initializers.zeros()
272 | )for i in range(self.cross_hidden)]
273 | super(CrossLayer, self).build(input_shape)
274 |
275 | def call(self, inputs, **kwargs):
276 | inputs=tf.expand_dims(inputs,axis=-1)
277 | pre_inputs=inputs
278 | for i in range(self.cross_hidden):
279 | pre_inputs=tf.keras.backend.batch_dot(inputs,tf.keras.backend.dot(
280 | tf.transpose(pre_inputs,perm=[0,2,1]),self.kernel[i]))+pre_inputs+self.bias[i]
281 |
282 | return pre_inputs
283 |
284 |
285 | class CIN(tf.keras.layers.Layer):
286 | '''
287 | XDeep core:
288 | x^k=sum[w()]
289 | x1->....x^k---> RNN(Recursive Format)
290 | ==hk*m*d--->con1D--->hk*d==>x^k
291 | final_output=concat(all feature map sum)
292 |
293 | feature map:
294 | row=D,col=m*h
295 | '''
296 | def __init__(self, conv_size=None, output_dim=1):
297 | super(CIN, self).__init__()
298 | if conv_size is None:
299 | conv_size = [200, 200, 200]
300 | self.conv_size=conv_size
301 | self.concat=tf.keras.layers.Concatenate()
302 | self.output_dim=output_dim
303 | if output_dim==1:
304 | self.logit_layer=tf.keras.layers.Dense(1)
305 |
306 | def build(self, input_shape):
307 | super(CIN, self).build(input_shape)
308 | self.hidden_conv=[tf.keras.layers.Conv1D(size,1) for size in self.conv_size]
309 |
310 | def call(self, inputs, **kwargs):
311 | x0 = tf.split(inputs, [1] * inputs.shape[-1], -1)
312 | pre_=x0
313 | sum_pooling_list=[]
314 |
315 | for conv_ in self.hidden_conv:
316 | z = tf.matmul(x0, pre_, transpose_b=True)
317 | z = tf.transpose(z, perm=[1, 0, 3, 2])
318 | z=tf.reshape(z,[-1,z.shape[1],z.shape[2]*z.shape[3]])
319 | z=conv_(z)
320 | pre_ = tf.transpose(z,[0,2,1])
321 | pre_=tf.split(pre_, [1] * pre_.shape[-1], -1)
322 | sum_pooling_list.append(tf.reduce_sum(z, axis=-1))
323 | output=self.concat(sum_pooling_list)
324 | if self.output_dim==1:
325 | output=self.logit_layer(output)
326 |
327 | return output
328 |
329 | class AttentionBaseLayer(tf.keras.layers.Layer):
330 | '''
331 | AFM core:
332 | base attention
333 | advise to go directly DIN
334 | '''
335 | def __init__(self,attention_dim=4,seed=2020,output_dim=1):
336 | super(AttentionBaseLayer, self).__init__()
337 | self.add=tf.keras.layers.Add()
338 | self.atten_dim=attention_dim
339 | self.seed=seed
340 | self.single_mlp = tf.keras.layers.Dense(1, 'relu', use_bias=False, kernel_initializer=glorot_uniform(self.seed))
341 | self.single_softmax=tf.keras.layers.Activation('softmax')
342 | self.output_layer=tf.keras.layers.Dense(output_dim)
343 |
344 | def build(self, input_shape):
345 | super(AttentionBaseLayer, self).build(input_shape)
346 |
347 | self.kernel_w=self.add_weight(
348 | name='single_score_w',
349 | shape=(input_shape[0][-1],self.atten_dim),
350 | initializer=glorot_uniform(seed=self.seed)
351 | )
352 | self.kernel_b=self.add_weight(
353 | name='single_score_b',
354 | shape=(self.atten_dim,),
355 | initializer=glorot_uniform(seed=self.seed)
356 | )
357 |
358 |
359 | def call(self, inputs, **kwargs):
360 | inputs=tf.concat(inputs,axis=1)
361 | score_=self.single_mlp(tf.add(tf.keras.backend.dot(inputs,self.kernel_w),self.kernel_b))
362 | score_w=self.single_softmax(score_)
363 | atten_inputs=tf.reduce_sum(score_w*inputs,axis=1)
364 | output=self.output_layer(atten_inputs)
365 |
366 | return output
367 |
368 |
--------------------------------------------------------------------------------
/kon/model/ctr_model/model/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/29 下午4:01
6 | @File :__init__.py.py
7 | ================================='''
--------------------------------------------------------------------------------
/kon/model/cvr_model/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/9/26 下午6:23
6 | @File :__init__.py.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
9 | from sklearn.model_selection import KFold, StratifiedKFold
10 | from sklearn.metrics import mean_squared_error as mse
11 | from sklearn.preprocessing import LabelEncoder
12 | from sklearn.metrics import f1_score, r2_score
13 | from hyperopt import fmin, tpe, hp, partial
14 | from numpy.random import random, shuffle
15 | import matplotlib.pyplot as plt
16 | from pandas import DataFrame
17 | import tensorflow as tf
18 | from tqdm import tqdm
19 | from PIL import Image
20 | import lightgbm as lgb
21 | import networkx as nx
22 | import pandas as pd
23 | import numpy as np
24 | import warnings
25 | import cv2
26 | import os
27 | import gc
28 | import re
29 | import datetime
30 | import sys
31 | from kon.model.embedding.setence_model import *
32 | from kon.model.feature_eng.feature_transform import feature_tool
33 | from kon.model.feature_eng.base_model import base_model
34 | from kon.model.ctr_model.model.models import *
--------------------------------------------------------------------------------
/kon/model/cvr_model/layer/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/9/26 下午6:24
6 | @File :__init__.py.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
9 | from sklearn.model_selection import KFold, StratifiedKFold
10 | from sklearn.metrics import mean_squared_error as mse
11 | from sklearn.preprocessing import LabelEncoder
12 | from sklearn.metrics import f1_score, r2_score
13 | from hyperopt import fmin, tpe, hp, partial
14 | from numpy.random import random, shuffle
15 | import matplotlib.pyplot as plt
16 | from pandas import DataFrame
17 | import tensorflow as tf
18 | from tqdm import tqdm
19 | from PIL import Image
20 | import lightgbm as lgb
21 | import networkx as nx
22 | import pandas as pd
23 | import numpy as np
24 | import warnings
25 | import cv2
26 | import os
27 | import gc
28 | import re
29 | import datetime
30 | import sys
31 | from kon.model.embedding.setence_model import *
32 | from kon.model.feature_eng.feature_transform import feature_tool
33 | from kon.model.feature_eng.base_model import base_model
34 | from kon.model.ctr_model.model.models import *
--------------------------------------------------------------------------------
/kon/model/cvr_model/model/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/9/26 下午6:24
6 | @File :__init__.py.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
9 | from sklearn.model_selection import KFold, StratifiedKFold
10 | from sklearn.metrics import mean_squared_error as mse
11 | from sklearn.preprocessing import LabelEncoder
12 | from sklearn.metrics import f1_score, r2_score
13 | from hyperopt import fmin, tpe, hp, partial
14 | from numpy.random import random, shuffle
15 | import matplotlib.pyplot as plt
16 | from pandas import DataFrame
17 | import tensorflow as tf
18 | from tqdm import tqdm
19 | from PIL import Image
20 | import lightgbm as lgb
21 | import networkx as nx
22 | import pandas as pd
23 | import numpy as np
24 | import warnings
25 | import cv2
26 | import os
27 | import gc
28 | import re
29 | import datetime
30 | import sys
31 | from kon.model.embedding.setence_model import *
32 | from kon.model.feature_eng.feature_transform import feature_tool
33 | from kon.model.feature_eng.base_model import base_model
34 | from kon.model.ctr_model.model.models import *
--------------------------------------------------------------------------------
/kon/model/embedding/.idea/embedding.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/kon/model/embedding/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/kon/model/embedding/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/kon/model/embedding/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/kon/model/embedding/README.md:
--------------------------------------------------------------------------------
1 | 这部分其实你会发现这个和浅梦大佬的很像,这部分其实是以前我刚开始接触这一块,照着他写的写的
2 | 未来会重构,当初学得有些模糊,这里面主要是一些图嵌入的方法,还会cover更多的gnn方面的模型
3 |
4 | 
--------------------------------------------------------------------------------
/kon/model/embedding/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/29 下午4:01
6 | @File :__init__.py.py
7 | ================================='''
8 |
--------------------------------------------------------------------------------
/kon/model/embedding/logs/0/best_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/logs/0/best_weights.h5
--------------------------------------------------------------------------------
/kon/model/embedding/logs/0/events.out.tfevents.1564644409.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/logs/0/events.out.tfevents.1564644409.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/logs/0/events.out.tfevents.1565180032.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/logs/0/events.out.tfevents.1565180032.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/logs/0/events.out.tfevents.1565180080.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/logs/0/events.out.tfevents.1565180080.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/model_test.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | from kon.model.embedding.setence_model.deepwalk import DeepWalk
3 | from kon.model.embedding.setence_model.line import Line
4 | from kon.model.embedding.setence_model.node2vec import node2vec
5 | from kon.model.embedding.setence_model.sdne import sdne
6 | from kon.model.embedding.util.util_tool import read_graph
7 | from kon.model.embedding.util.evaluate import evaluate_tools
8 |
9 | def deep_walk_run(edgelist_path,is_evluate=False):
10 | Graph = read_graph(edgelist_path)
11 |
12 | deepwalk = DeepWalk(
13 | Graph=Graph,
14 | per_vertex=80,
15 | walk_length=10,
16 | window_size=5,
17 | dimension_size=8,
18 | work=4
19 | )
20 | embeddings = deepwalk.transform()
21 | if is_evluate:
22 | eval = evaluate_tools(embeddings=embeddings, label_path='wiki/Wiki_labels.txt')
23 | eval.plot_embeddings()
24 | return embeddings
25 |
26 | def line_run():
27 | from kon.model.embedding.util.util_tool import read_graph
28 | import os
29 | print(os.getcwd())
30 | Graph = read_graph('wiki/Wiki_edgelist.txt')
31 | line = Line(
32 | Graph=Graph,
33 | dimension_size=128,
34 | per_vertex=100,
35 | walk_length=10,
36 | window_size=5,
37 | work=1,
38 | negative_ratio=1,
39 | batch_size=128,
40 | log_dir='logs/0/',
41 | epoch=100,
42 | )
43 | embeddings = line.transform()
44 | from kon.model.embedding.util.evaluate import evaluate_tools
45 | tool = evaluate_tools(embeddings, label_path='wiki/Wiki_labels.txt')
46 | tool.plot_embeddings()
47 |
48 | def node2vec_run():
49 | Graph = read_graph('wiki/Wiki_edgelist.txt')
50 |
51 | node_vec = node2vec(
52 | Graph=Graph,
53 | per_vertex=80,
54 | walk_length=10,
55 | window_size=5,
56 | dimension_size=128,
57 | work=1,
58 | p=0.25,
59 | q=4
60 | )
61 |
62 | embeddings = node_vec.transform()
63 | eval_tool = evaluate_tools(embeddings, label_path='wiki/Wiki_labels.txt')
64 | eval_tool.plot_embeddings()
65 |
66 | def sdne_run():
67 | Graph = read_graph('wiki/Wiki_edgelist.txt')
68 | sden_model = sdne(
69 | Graph=Graph,
70 | dimension_size=128,
71 | per_vertex=100,
72 | walk_length=10,
73 | window_size=5,
74 | work=1,
75 | beta=5,
76 | alpha=1e-6,
77 | verbose=1,
78 | epochs=1000,
79 | batch_size=512,
80 | log_dir='logs/0/',
81 | hidden_size_list=[256, 128],
82 | l1=1e-5,
83 | l2=1e-4
84 | )
85 | sden_model.train()
86 | embeddings = sden_model.get_embeddings()
87 |
88 | from kon.model.embedding.util.evaluate import evaluate_tools
89 | eval_tool = evaluate_tools(embeddings, label_path='wiki/Wiki_labels.txt')
90 | eval_tool.plot_embeddings()
91 |
92 |
93 | def model_test(build_name,edgelist_path='wiki/Wiki_edgelist.txt',embedding=8):
94 | if build_name=='deepwalk':
95 | embedding=deep_walk_run(edgelist_path)
96 | elif build_name=='line':
97 | line_run()
98 | elif build_name=='node2vec':
99 | node2vec_run()
100 | elif build_name=='sdne':
101 | sdne_run()
102 | elif build_name=='all':
103 | deep_walk_run(edgelist_path)
104 | line_run()
105 | node2vec_run()
106 | sdne_run()
107 |
108 | return embedding
109 |
110 | if __name__=='__main__':
111 | model_test('deepwalk')
112 |
113 |
114 |
--------------------------------------------------------------------------------
/kon/model/embedding/other/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/29 下午4:01
6 | @File :__init__.py.py
7 | ================================='''
8 |
--------------------------------------------------------------------------------
/kon/model/embedding/other/other-collections.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | from collections import namedtuple
4 | #创建了一个tuple对象
5 | Point=namedtuple('Point',['x','y'])
6 | p=Point(1,2)
7 | print(p.x)
8 | print(p.y)
9 |
10 |
11 | from collections import deque
12 |
13 | #实现了队列的对象
14 | q=deque(['a','b','c'])
15 | q.append('x')
16 | q.appendleft('y')
17 | print(q)
18 | q.pop()
19 | q.popleft()
20 | print(q)
21 |
22 |
23 | from collections import defaultdict
24 |
25 | #为dict的key设置默认值
26 | dd=defaultdict(int)
27 | dd['key1']=dd['key1']+1
28 | print(dd.keys())
29 |
30 |
31 | from collections import OrderedDict
32 | #OrderDict会按照插入顺序排序,不过并没有看出来和普通的有什么区别
33 | d = dict([('n', 1), ('b', 2), ('c', 3)])
34 | print(d)
35 |
36 | d=OrderedDict([('n',1), ('b', 2), ('c', 3)])
37 | print(d)
38 |
39 |
40 | from collections import Counter
41 | #Counter计数器
42 | c=Counter()
43 | for ch in 'programming':
44 | c[ch]=c[ch]+1
45 |
46 | print(c)
47 |
48 |
--------------------------------------------------------------------------------
/kon/model/embedding/other/other-networks.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import networkx as nx
3 |
4 | #建立图
5 | G=nx.Graph()
6 |
7 | #添加节点
8 | G.add_node(1)
9 | G.add_nodes_from([2,3])
10 |
11 | #一个图添加到另一个图中
12 | # H=nx.path_graph(10)
13 | # G.add_nodes_from(H)
14 |
15 | #添加边
16 | G.add_edge(1,2)
17 | e=(2,3)
18 | G.add_edge(*e)
19 | # G.add_edges_from(H.edges)
20 |
21 | import matplotlib.pyplot as plt
22 | # G=nx.petersen_graph()
23 | nx.draw(G,with_labels=True,font_weight='bold')
24 | plt.show()
25 |
26 | #有向图
27 | DG=nx.DiGraph()
28 | DG.add_weighted_edges_from([(1,2,0.5),(3,1,0.75)])
29 | print(DG.out_degree(1,weight='weight'))
30 |
31 |
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/7/21 上午9:00
6 | @File :__init__.py.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/backone_language_model.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | from gensim.models import Word2Vec
3 |
4 | class language_model():
5 | def __init__(self,window_size,dimension_size,work):
6 | self.crop_size=int(window_size)
7 | self.unit_size=dimension_size
8 | self.workers=work
9 |
10 | def word2vec_on_train(self,sentence):
11 | model=Word2Vec(
12 | sentences=sentence,
13 | window=self.crop_size,
14 | size=self.unit_size,
15 | sg=1,
16 | hs=0,
17 | workers=self.workers,
18 | iter=3,
19 | min_count=0
20 | )
21 |
22 | return model
23 |
24 |
25 |
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/backone_optimize.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import numpy as np
3 | from numpy import random
4 |
5 | class optimize_funcation():
6 |
7 | def __init__(self):
8 | pass
9 |
10 | def generate_alias_table(self, all_probability):
11 | num_probability=len(all_probability)
12 |
13 | all_probability=list((np.array(all_probability)*num_probability)/np.sum(all_probability))
14 |
15 |
16 | small, large = [], []
17 | prab, alias = [-1] * num_probability, [-1] * num_probability
18 |
19 | format_count=0
20 | for prob_rank in range(num_probability):
21 | if all_probability[prob_rank] == 1:
22 | prab[prob_rank] = 1
23 | alias[prob_rank] = -1
24 | format_count+=1
25 | elif all_probability[prob_rank] > 1:
26 | large.append(prob_rank)
27 | else:
28 | small.append(prob_rank)
29 |
30 | if format_count==num_probability:
31 | return prab,alias
32 |
33 | while 1:
34 | if len(small)==0:
35 | break
36 | if len(large)==0:
37 | break
38 | small_rank = small.pop()
39 | small_data = all_probability[small_rank]
40 | need_data = 1 - small_data
41 | large_rank = large.pop()
42 | rest_data = all_probability[large_rank] - need_data
43 |
44 | prab[small_rank] = small_data
45 | alias[small_rank] = large_rank
46 | all_probability[large_rank]=rest_data
47 |
48 | if rest_data == 1:
49 | prab[large_rank] = 1
50 | alias[large_rank] = -1
51 |
52 | elif rest_data > 1:
53 | large.append(large_rank)
54 | else:
55 | small.append(large_rank)
56 |
57 | while len(small)!=0:
58 | small_rank=small.pop()
59 | prab[small_rank]=1
60 | while len(large)!=0:
61 | large_rank=large.pop()
62 | prab[large_rank]=1
63 |
64 | return prab, alias
65 |
66 | def alias_sample(self, prab, alias,rank=None):
67 | if rank==None:
68 | rank=int(random.random()*len(prab))
69 | prab_ = random.random()
70 | if prab_ < prab[rank]:
71 |
72 | return rank
73 | else:
74 | return alias[rank]
75 | def batch_alias_sample(self,prab,alias,rank_list):
76 | all_index=[]
77 | for rank in rank_list:
78 | index=self.alias_sample(prab,alias,rank)
79 | all_index.append(index)
80 | return all_index
81 |
82 | # kon
83 | def gen_prob_dist(self,N):
84 | p = np.random.randint(0, 100, N)
85 | return p / np.sum(p)
86 |
87 | def simulate(self,N=100, k=10000):
88 |
89 | truth = self.gen_prob_dist(N)
90 |
91 | area_ratio = truth * N
92 | prab, alias = self.generate_alias_table(all_probability=area_ratio)
93 |
94 | ans = np.zeros(N)
95 | for _ in range(k):
96 | i = self.alias_sample(alias=alias,prab=prab,rank=_)
97 |
98 | ans[i] += 1
99 |
100 |
101 | return ans / np.sum(ans), truth
102 |
103 | if __name__=='__main__':
104 | tool=optimize_funcation()
105 | tool.simulate()
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/deepwalk.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | from numpy import random
3 | from kon.model.embedding.setence_model.walk_core_model import core_model
4 | from kon.model.embedding.util.evaluate import evaluate_tools
5 | from tqdm import tqdm
6 | from kon.model.embedding.util.util_tool import read_graph
7 |
8 | class DeepWalk(core_model):
9 |
10 | def __init__(self,Graph,per_vertex,walk_length,window_size,dimension_size,work):
11 | super().__init__(Graph,per_vertex,walk_length,window_size,dimension_size,work)
12 |
13 | def deepwalk(self):
14 | sentence_list=[]
15 |
16 | for num in tqdm(range(self.walk_epoch),desc='walk epoch'):
17 | random.shuffle(self.all_nodes)
18 | for vertex in tqdm(self.all_nodes,desc='generator node walk seq'):
19 | sentence_list.append(self.random_walk(start_vertex=vertex))
20 |
21 | return sentence_list
22 |
23 | def transform(self):
24 | sentence_list=self.deepwalk()
25 | embeddings=self.embdding_train(sentence_list)
26 | return embeddings
27 |
28 |
29 | if __name__=='__main__':
30 |
31 | Graph = read_graph('wiki/Wiki_edgelist.txt')
32 |
33 | deepwalk=DeepWalk(
34 | Graph=Graph,
35 | per_vertex=80,
36 | walk_length=10,
37 | window_size=5,
38 | dimension_size=64,
39 | work=4
40 | )
41 |
42 | embeddings=deepwalk.transform()
43 | print(embeddings.keys())
44 | print(embeddings.values())
45 | eval = evaluate_tools(embeddings=embeddings)
46 | eval.plot_embeddings()
47 |
48 |
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/line.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | from kon.model.embedding.setence_model.walk_core_model import core_model
3 | from kon.model.embedding.util.util_tool import get_node_information
4 | from numpy import random
5 | import numpy as np
6 | import math
7 |
8 | class Line(core_model):
9 |
10 | def __init__(self,Graph,per_vertex,walk_length,window_size,dimension_size,work,log_dir,epoch,negative_ratio=0,order='second',batch_size=1024,times=1):
11 | super().__init__(Graph,per_vertex,walk_length,window_size,dimension_size,work)
12 | self.times=times
13 | self.epoch=epoch
14 | self.log_dir=log_dir
15 | self.batch_size=batch_size
16 | self.order=order
17 | self.negative_ratio=negative_ratio
18 | self.idx2node,self.node2idx=get_node_information(self.all_nodes)
19 | self.generate_smapling_table()
20 |
21 | def generate_edge_sampling_table(self):
22 | #边采样,防止论文中提到的权重差距大导致梯度爆炸
23 | numEdges = self.numEdges
24 |
25 | edges_sum = 0
26 | for edge in self.all_edges:
27 | edges_sum += self.G[edge[0]][edge[1]].get('weight', 1.0)
28 |
29 | #搜索每条边的权重
30 | all_probability = []
31 | for edge in self.all_edges:
32 | probability = self.G[edge[0]][edge[1]].get('weight', 1.0) * numEdges / edges_sum
33 | all_probability.append(probability)
34 |
35 | self.edge_prab, self.edge_alias = self.optimize_fun.generate_alias_table(all_probability)
36 |
37 | def generate_node_sampling_table(self,power=0.75):
38 | node_degree = np.zeros(self.numNodes)
39 |
40 | #顶点采样,减少顶点数量,经验值power=0.75,论文将pagerannk的重要性判断定成了顶点的出度,或者说出度代表了每个顶点的权重
41 | #计算每个顶点出度
42 | for edge in self.all_edges:
43 | node_degree[self.node2idx[edge[0]]] += self.G[edge[0]][edge[1]].get('weight', 1.0)
44 |
45 | #对每个出度进行power减少
46 | weights_sum=0
47 | for rank in range(self.numNodes):
48 | weights_sum+=math.pow(node_degree[rank],power)
49 |
50 | #计算每个顶点的概率
51 | all_probability=[]
52 | for rank in range(self.numNodes):
53 | probability=float(math.pow(node_degree[rank],power))/weights_sum
54 | all_probability.append(probability)
55 |
56 | self.node_prab, self.node_alias = self.optimize_fun.generate_alias_table(all_probability)
57 |
58 | #生成alias需要的table
59 | def generate_smapling_table(self,power=0.75):
60 | self.generate_node_sampling_table(power)
61 | self.generate_edge_sampling_table()
62 |
63 | def generator_postive_data(self,data_index,start_index,end_index,edges_index):
64 | rank_list=[]
65 | for rank in range(start_index,end_index):
66 | rank_list.append(data_index[rank])
67 |
68 | edge_index_list_ = self.optimize_fun.batch_alias_sample(
69 | prab=self.edge_prab,
70 | alias=self.edge_alias,
71 | rank_list=rank_list
72 | )
73 |
74 | begin_node,end_node=[],[]
75 |
76 | for edge_index_ in edge_index_list_:
77 | begin_node.append(edges_index[edge_index_][0])
78 | end_node.append(edges_index[edge_index_][1])
79 |
80 | return begin_node,end_node
81 |
82 | def generator_negative_data(self,begin_node):
83 | rank_list = []
84 | for i in range(len(begin_node)):
85 | rank_list.append(random.choice(list(range(len(self.node_prab)))))
86 |
87 | end_node = self.optimize_fun.batch_alias_sample(
88 | prab=self.node_prab,
89 | alias=self.node_alias,
90 | rank_list=rank_list
91 | )
92 |
93 | return begin_node,end_node
94 |
95 | def generator_data(self):
96 | #edges_index:(begin_node,end_node)
97 | edges_index = []
98 | for edge in self.all_edges:
99 | edge_index = (self.node2idx[edge[0]], self.node2idx[edge[1]])
100 | edges_index.append(edge_index)
101 |
102 | #data_index:index of (edge_index)
103 | data_size=self.numEdges
104 | data_index=list(range(data_size))
105 | random.shuffle(data_index)
106 |
107 | begin_node=[]
108 | start_index=0
109 | end_index=min(start_index+self.batch_size,data_size)
110 |
111 | #constrat negative number
112 | mod=0
113 | #num(generator negative data)
114 | mod_size=1+self.negative_ratio
115 |
116 | while True:
117 | if mod==0:
118 | begin_node,end_node=self.generator_postive_data(data_index,start_index,end_index,edges_index)
119 | sign=np.ones(len(begin_node))
120 |
121 | else:
122 | begin_node,end_node=self.generator_negative_data(begin_node)
123 | sign=np.ones(len(begin_node))*-1
124 |
125 | if self.order == 'all':
126 | yield ([np.array(begin_node), np.array(end_node)], [sign, sign])
127 | else:
128 | yield ([np.array(begin_node), np.array(end_node)], [sign])
129 |
130 | #控制负样本个数
131 | mod+=1
132 | mod%=mod_size
133 |
134 | if mod==0:
135 | start_index = end_index
136 | end_index = min(start_index + self.batch_size, data_size)
137 |
138 | if start_index>=data_size:
139 | mod=0
140 | begin_node=[]
141 | random.shuffle(data_index)
142 | start_index=0
143 | end_index=min(start_index+self.batch_size,data_size)
144 |
145 | def train(self):
146 | model=self.creat_line_model()
147 | model.fit_generator(
148 | self.generator_data(),
149 | steps_per_epoch=((self.numEdges*(1+self.negative_ratio)-1)//self.batch_size+1)*self.times,
150 | verbose=1,
151 | epochs=self.epoch,
152 | callbacks=self.model_prepare(self.log_dir)
153 | )
154 |
155 | def get_embedding(self):
156 | self.embeddings={}
157 | if self.order=='first':
158 | embeddings=self.embedding_dict['first'].get_weights()[0]
159 | elif self.order=='second':
160 | embeddings=self.embedding_dict['second'].get_weights()[0]
161 | else:
162 | embeddings = np.hstack((self.embedding_dict['first'].get_weights()[
163 | 0], self.embedding_dict['second'].get_weights()[0]))
164 | idx2node = self.idx2node
165 | for i, embedding in enumerate(embeddings):
166 | self.embeddings[idx2node[i]] = embedding
167 |
168 | return self.embeddings
169 |
170 | def transform(self):
171 | self.train()
172 | self.get_embedding()
173 | return self.embeddings
174 |
175 |
176 | if __name__=='__main__':
177 | from util_tool import read_graph
178 | Graph=read_graph('model/embedding/wiki/Wiki_edgelist.txt')
179 | line=Line(
180 | Graph=Graph,
181 | dimension_size=128,
182 | per_vertex=100,
183 | walk_length=10,
184 | window_size=5,
185 | work=1,
186 | negative_ratio=1,
187 | batch_size=128,
188 | log_dir='model/embedding/setence_model/logs/0/',
189 | epoch=100,
190 | )
191 | embeddings=line.transform()
192 | from evaluate import evaluate_tools
193 | tool=evaluate_tools(embeddings)
194 | tool.plot_embeddings()
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/best_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/best_weights.h5
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011299.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011299.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011324.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011324.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011336.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011336.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013918.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013918.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013943.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013943.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013958.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013958.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013985.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013985.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014029.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014029.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014060.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014060.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014368.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014368.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014404.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014404.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014481.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014481.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014728.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014728.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014760.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014760.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014805.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014805.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015151.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015151.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015263.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015263.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015277.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015277.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015308.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015308.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565057550.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565057550.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058087.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058087.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058252.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058252.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058261.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058261.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058653.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058653.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058673.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058673.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058702.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058702.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059234.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059234.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059587.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059587.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059681.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059681.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059708.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059708.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059726.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059726.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059768.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059768.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059787.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059787.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060677.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060677.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060761.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060761.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060853.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060853.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069889.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069889.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069922.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069922.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069970.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069970.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070262.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070262.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070318.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070318.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070526.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070526.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070581.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070581.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070607.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070607.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070688.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070688.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070826.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070826.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070867.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070867.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070932.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070932.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070966.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070966.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070986.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070986.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565071024.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565071024.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565162850.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565162850.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565165341.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565165341.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565168457.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565168457.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565170961.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565170961.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173560.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173560.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173578.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173578.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173609.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173609.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173761.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173761.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174061.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174061.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174117.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174117.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174191.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174191.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174253.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174253.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174276.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174276.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174293.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174293.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174349.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174349.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174378.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174378.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565179687.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565179687.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565182503.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565182503.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565182554.dream-System:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565182554.dream-System
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277074.hjq-Precision-T7610.19721.672.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277074.hjq-Precision-T7610.19721.672.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277074.hjq-Precision-T7610.profile-empty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277074.hjq-Precision-T7610.profile-empty
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277128.hjq-Precision-T7610.20083.106.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277128.hjq-Precision-T7610.20083.106.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277719.hjq-Precision-T7610.20872.106.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277719.hjq-Precision-T7610.20872.106.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277787.hjq-Precision-T7610.21065.672.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277787.hjq-Precision-T7610.21065.672.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278233.hjq-Precision-T7610.21443.672.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278233.hjq-Precision-T7610.21443.672.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278349.hjq-Precision-T7610.21613.672.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278349.hjq-Precision-T7610.21613.672.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278745.hjq-Precision-T7610.22262.672.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278745.hjq-Precision-T7610.22262.672.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577279268.hjq-Precision-T7610.22939.672.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577279268.hjq-Precision-T7610.22939.672.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577279585.hjq-Precision-T7610.2711.672.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577279585.hjq-Precision-T7610.2711.672.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577280012.hjq-Precision-T7610.3191.672.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577280012.hjq-Precision-T7610.3191.672.v2
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-31-14/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-31-14/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-32-08/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-32-08/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-41-59/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-41-59/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-43-07/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-43-07/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-50-33/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-50-33/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-52-29/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-52-29/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-59-05/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-59-05/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-07-48/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-07-48/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-13-05/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-13-05/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-20-12/local.trace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-20-12/local.trace
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/node2vec.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | from kon.model.embedding.setence_model.walk_core_model import core_model
3 | from kon.model.embedding.util.util_tool import read_graph
4 | from kon.model.embedding.util.evaluate import evaluate_tools
5 | from numpy import random
6 |
7 | class node2vec(core_model):
8 |
9 | def __init__(self,Graph,per_vertex,walk_length,window_size,dimension_size,work,p,q):
10 | super().__init__(Graph,per_vertex,walk_length,window_size,dimension_size,work)
11 | self.p=p
12 | self.q=q
13 |
14 | def Learn_Feature(self):
15 | self.Preprocess_Modified_Weights(self.p,self.q)
16 | sentence_list=[]
17 | for num in range(self.walk_epoch):
18 | random.shuffle(self.all_nodes)
19 | for node in self.all_nodes:
20 | sentence=self.random_walk(node,is_edge_sampling=True)
21 | sentence_list.append(sentence)
22 |
23 | return sentence_list
24 |
25 | def transform(self):
26 | sentence_list=self.Learn_Feature()
27 | embeddings=self.embdding_train(sentence_list)
28 |
29 | return embeddings
30 |
31 |
32 | if __name__=='__main__':
33 | Graph = read_graph('wiki/Wiki_edgelist.txt')
34 |
35 | node_vec= node2vec(
36 | Graph=Graph,
37 | per_vertex=80,
38 | walk_length=10,
39 | window_size=5,
40 | dimension_size=128,
41 | work=1,
42 | p=0.25,
43 | q=4
44 | )
45 |
46 | embeddings=node_vec.transform()
47 | eval_tool=evaluate_tools(embeddings)
48 | eval_tool.plot_embeddings()
49 |
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/sdne.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | from kon.model.embedding.setence_model.walk_core_model import core_model
3 | from kon.model.embedding.util.util_tool import read_graph,get_node_information
4 | import numpy as np
5 |
6 | class sdne(core_model):
7 |
8 | def __init__(self, Graph, per_vertex, walk_length, window_size, dimension_size, work,alpha,beta,epochs,batch_size,verbose,hidden_size_list,l1,l2,log_dir):
9 | super().__init__(Graph, per_vertex, walk_length, window_size, dimension_size, work)
10 | self.alpha=alpha
11 | self.beta=beta
12 | self.batch_size=batch_size
13 | self.epochs=epochs
14 | self.verbose=verbose
15 | self.log_dir=log_dir
16 | self.pred_all_nodes=self.all_nodes
17 | self.idx2node, self.node2idx = get_node_information(self.pred_all_nodes)
18 | self.W,self.W_ = self.generator_adjacency_matrix(self.pred_all_nodes)
19 | self.L=self.generator_L(self.W_)
20 | self.model,self.embedding_model=self.creat_model(hidden_size_list=hidden_size_list,l1=l1,l2=l2)
21 |
22 | def generator_adjacency_matrix(self,all_nodes):
23 | numNodes=len(all_nodes)
24 | W=np.zeros((numNodes,numNodes))
25 | W_=np.zeros((numNodes,numNodes))
26 |
27 | for start_vertex in all_nodes:
28 | start_rank=self.node2idx[start_vertex]
29 | for end_vertex in list(self.G.neighbors(start_vertex)):
30 | end_rank=self.node2idx[end_vertex]
31 | weight=self.G[start_vertex][end_vertex].get('weight',1.0)
32 | W[start_rank][end_rank]=weight
33 | W_[start_rank][end_rank]=weight
34 | W_[end_rank][start_rank]=weight
35 |
36 | return W,W_
37 |
38 | def generator_L(self,W_):
39 | D = np.zeros_like(W_)
40 |
41 | for i in range(len(W_)):
42 | D[i][i] = np.sum(W_[i])
43 | L = D - W_
44 |
45 | return L
46 |
47 | def generator_data(self):
48 | all_nodes=self.pred_all_nodes
49 | start_rank=0
50 | end_rank=min(self.batch_size,self.numNodes)
51 |
52 | while True:
53 | batch_nodes=all_nodes[start_rank:end_rank]
54 | node_index_list=[self.node2idx[node] for node in batch_nodes]
55 |
56 | batch_W=self.W[node_index_list,:]
57 | batch_L=self.L[node_index_list][:,node_index_list]
58 |
59 | input_=[batch_W,batch_L]
60 |
61 | yield (input_,input_)
62 |
63 | start_rank = end_rank
64 | end_rank += self.batch_size
65 | end_rank = min(end_rank, self.numNodes)
66 |
67 | if end_rank==self.numNodes:
68 | start_rank=0
69 | end_rank=min(self.batch_size,self.numNodes)
70 | np.random.shuffle(all_nodes)
71 |
72 | def train(self):
73 | self.model.compile('adam',[self.second_nd(self.beta),self.first_nd(self.alpha)])
74 | self.model.fit_generator(
75 | self.generator_data(),
76 | steps_per_epoch=self.numNodes//self.batch_size,
77 | epochs=self.epochs,
78 | callbacks=self.model_prepare(self.log_dir),
79 | verbose=self.verbose
80 | )
81 | return self.model
82 |
83 | def get_embeddings(self):
84 | embeddings={}
85 | pred_embeddings=self.embedding_model.predict(self.W,batch_size=self.batch_size)
86 |
87 | rank=0
88 | for embedding in pred_embeddings:
89 | embeddings[self.idx2node[rank]]=embedding
90 | rank+=1
91 | return embeddings
92 |
93 | if __name__=='__main__':
94 | Graph=read_graph()
95 | sden_model=sdne(
96 | Graph=Graph,
97 | dimension_size=128,
98 | per_vertex=100,
99 | walk_length=10,
100 | window_size=5,
101 | work=1,
102 | beta=5,
103 | alpha=1e-6,
104 | verbose=1,
105 | epochs=1000,
106 | batch_size=512,
107 | log_dir='model/embedding/setence_model/logs/0/',
108 | hidden_size_list=[256, 128],
109 | l1=1e-5,
110 | l2=1e-4
111 | )
112 |
113 | sden_model.train()
114 | embeddings=sden_model.get_embeddings()
115 |
116 | from kon.model import evaluate_tools
117 | eval_tool=evaluate_tools(embeddings)
118 | eval_tool.plot_embeddings()
119 |
120 |
121 |
--------------------------------------------------------------------------------
/kon/model/embedding/setence_model/walk_core_model.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import tensorflow as tf
3 |
4 | from keras.layers import Embedding,Input,Lambda,Dense
5 | from keras import backend as K
6 | from keras.optimizers import Adam
7 | from keras.callbacks import ReduceLROnPlateau,TensorBoard,EarlyStopping,ModelCheckpoint
8 | from keras.regularizers import l1_l2
9 | from keras import Model
10 | from numpy import random
11 | from kon.model.embedding.setence_model.backone_language_model import language_model
12 | from kon.model.embedding.setence_model.backone_optimize import optimize_funcation
13 | import numpy as np
14 |
15 | class core_model(object):
16 |
17 | def __init__(self,Graph,per_vertex,walk_length,window_size,dimension_size,work):
18 | self.G=Graph
19 | self.walk_epoch=per_vertex
20 | self.sentence_len=walk_length
21 | self.all_nodes=list(Graph.nodes())
22 | self.all_edges=list(Graph.edges())
23 | self.numEdges=Graph.number_of_edges()
24 | self.numNodes=Graph.number_of_nodes()
25 | self.dimension_size=dimension_size
26 | self.backone_model = language_model(
27 | dimension_size=dimension_size,
28 | window_size=window_size,
29 | work=work
30 | )
31 | self.optimize_fun=optimize_funcation()
32 |
33 | #node2vec dfs,dps控制器
34 | def unnormalized_transition_probability(self,t,v,p,q):
35 | '''
36 | :param v:目前所在顶点
37 | :param t: 上一次的节点
38 | : x:下一步的目标节点
39 | : x=t:d(tx)=0,1/p
40 | : t-x=1:d(tx)=1,1
41 | : else:d(tx)=2,1/q
42 | :return :edge_alias_table
43 | sampling weights:p(d(tx))*edge_weight
44 | '''
45 |
46 | unnormalized_probs=[]
47 |
48 | for x in self.G.neighbors(v):
49 | weight=self.G[v][x].get('weight',1.0)
50 | if x==t:
51 | unnormalized_probs.append(weight/p)
52 | elif self.G.has_edge(x,t):
53 | unnormalized_probs.append(weight)
54 | else:
55 | unnormalized_probs.append(weight/q)
56 | norm_sum=sum(unnormalized_probs)
57 | all_probs=[float(un_prob)/norm_sum for un_prob in unnormalized_probs]
58 |
59 | edge_sample_table=self.optimize_fun.generate_alias_table(all_probs)
60 |
61 | return edge_sample_table
62 |
63 | def Preprocess_Modified_Weights(self,p,q):
64 | alias_nodes={}
65 |
66 | count=0
67 | for node in self.all_nodes:
68 | unnormalized_probs=[]
69 | for neighbor in self.G.neighbors(node):
70 | weight=self.G[node][neighbor].get('weight',1.0)
71 | unnormalized_probs.append(weight)
72 |
73 | norm_sum=sum(unnormalized_probs)
74 | all_probs=[float(un_probs)/norm_sum for un_probs in unnormalized_probs]
75 | alias_nodes[node]=self.optimize_fun.generate_alias_table(all_probability=all_probs)
76 |
77 | count+=1
78 |
79 | alias_edges={}
80 |
81 | for edge in self.all_edges:
82 | alias_edges[edge]=self.unnormalized_transition_probability(edge[0],edge[1],p,q)
83 |
84 | self.alias_nodes=alias_nodes
85 | self.alias_edges=alias_edges
86 |
87 |
88 | #deepwalk,node2vec core
89 | def random_walk(self,start_vertex,is_edge_sampling=False):
90 | node_sentence=[start_vertex]
91 | now_walk_len=1
92 |
93 | while now_walk_len0:
97 | if not is_edge_sampling:
98 | next_node=random.choice(neighborhood_list)
99 | node_sentence.append(next_node)
100 | else:
101 | if len(node_sentence)==1:
102 | next_node_rank=self.optimize_fun.alias_sample(prab=self.alias_nodes[now_node][0],alias=self.alias_nodes[now_node][1])
103 | next_node=neighborhood_list[next_node_rank]
104 | node_sentence.append(next_node)
105 | else:
106 | pre_node=node_sentence[-2]
107 | edge=(pre_node,now_node)
108 | next_node_rank=self.optimize_fun.alias_sample(self.alias_edges[edge][0],alias=self.alias_edges[edge][1])
109 | next_node=neighborhood_list[next_node_rank]
110 | node_sentence.append(next_node)
111 | now_walk_len+=1
112 | else:
113 | break
114 |
115 | return node_sentence
116 |
117 | #line_core
118 | def line_loss(self,y_true,y_pred):
119 | #在二阶有负样本,因为引入了-1的权重,故loss共用
120 | return -K.mean(K.log(K.sigmoid(y_true*y_pred)))
121 |
122 | def creat_line_model(self,order='second',lr=0.001):
123 | v_i = Input(shape=(1,))
124 | v_j = Input(shape=(1,))
125 |
126 | first_emb = Embedding(self.numNodes, self.dimension_size, name='first_emb')
127 | second_emb = Embedding(self.numNodes, self.dimension_size, name='second_emb')
128 | context_emb = Embedding(self.numNodes, self.dimension_size, name='context_emb')
129 |
130 | v_i_emb = first_emb(v_i)
131 | v_j_emb = first_emb(v_j)
132 |
133 | v_i_emb_second = second_emb(v_i)
134 | v_j_context_emb = context_emb(v_j)
135 |
136 | first = Lambda(lambda x: tf.reduce_sum(
137 | x[0] * x[1], axis=-1), name='first_order')([v_i_emb, v_j_emb])
138 | second = Lambda(lambda x: tf.reduce_sum(
139 | x[0] * x[1], axis=-1), name='second_order')([v_i_emb_second, v_j_context_emb])
140 |
141 | if order == 'first':
142 | output_list = [first]
143 | elif order == 'second':
144 | output_list = [second]
145 | else:
146 | output_list = [first, second]
147 |
148 | model = Model(inputs=[v_i, v_j], outputs=output_list)
149 |
150 | adam=Adam(lr=lr)
151 | model.compile(optimizer=adam,loss=self.line_loss)
152 |
153 | self.embedding_dict = {'first': first_emb, 'second': second_emb}
154 |
155 | return model
156 |
157 | #sdne
158 | def first_nd(self, alpha):
159 | def first_loss(y_true, y_pred):
160 | loss = 2 * alpha * tf.linalg.trace(tf.matmul(tf.matmul(y_pred, y_true, transpose_a=True), y_pred))
161 | return loss / tf.to_float(K.shape(y_pred)[0])
162 |
163 | return first_loss
164 |
165 | def second_nd(self, beta):
166 | def second_loss(y_true, y_pred):
167 | b_ = np.ones_like(y_true)
168 | b_[y_true != 0] = beta
169 | loss = K.sum(K.square((y_true - y_pred) * b_), axis=-1)
170 | return K.mean(loss)
171 |
172 | return second_loss
173 |
174 | def encoder(self, x, hidden_size_list, l1, l2):
175 | for i in range(len(hidden_size_list) - 1):
176 | x = Dense(units=hidden_size_list[i], activation='relu', kernel_regularizer=l1_l2(l1, l2))(x)
177 | y = Dense(units=hidden_size_list[-1], activation='relu', kernel_regularizer=l1_l2(l1, l2), name='encode')(x)
178 |
179 | return y
180 |
181 | def decoder(self, y, hidden_size_list, l1, l2):
182 | for i in reversed(range(len(hidden_size_list) - 1)):
183 | y = Dense(units=hidden_size_list[i], activation='relu', kernel_regularizer=l1_l2(l1, l2))(y)
184 | x = Dense(units=self.numNodes, activation='relu', name='decode')(y)
185 |
186 | return x
187 |
188 | def creat_model(self, hidden_size_list, l1, l2):
189 | adjacency_matrix = Input(shape=(self.numNodes,))
190 | L = Input(shape=(None,))
191 | x = adjacency_matrix
192 |
193 | y = self.encoder(x, hidden_size_list, l1, l2)
194 | x_ = self.decoder(y, hidden_size_list, l1, l2)
195 |
196 | model = Model(inputs=[adjacency_matrix, L], outputs=[x_, y])
197 | emb = Model(inputs=adjacency_matrix, outputs=y)
198 |
199 | return model,emb
200 |
201 |
202 | #callback
203 | def model_prepare(self,log_dir):
204 | tensorboard=TensorBoard(log_dir=log_dir)
205 |
206 | checkpoint=ModelCheckpoint(
207 | log_dir+'best_weights.h5',
208 | monitor='loss',
209 | save_best_only=True,
210 | save_weights_only=True,
211 | verbose=1,
212 | period=1
213 | )
214 |
215 | earlystop=EarlyStopping(
216 | monitor='loss',
217 | patience=50
218 | )
219 |
220 | reduce_lr=ReduceLROnPlateau(
221 | monitor='loss',
222 | patience=1,
223 | factor=0.1
224 | )
225 |
226 | callback_list=[tensorboard,checkpoint,earlystop,reduce_lr]
227 | return callback_list
228 |
229 | #language model(netivate_skig_model)
230 | def embdding_train(self,sentence_list):
231 |
232 | print('begin train embedding')
233 | print('loading...')
234 |
235 | model=self.backone_model.word2vec_on_train(sentence_list)
236 |
237 | print('train ending')
238 |
239 | embeddings={}
240 | for node in self.all_nodes:
241 | embeddings[node]=model.wv[node]
242 |
243 | return embeddings
244 |
--------------------------------------------------------------------------------
/kon/model/embedding/util/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/7/21 上午9:00
6 | @File :__init__.py.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
--------------------------------------------------------------------------------
/kon/model/embedding/util/evaluate.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import numpy as np
3 | from sklearn.manifold import TSNE
4 | from kon.model.embedding.util.util_tool import read_node_label
5 |
6 | import matplotlib.pyplot as plt
7 |
8 | class evaluate_tools():
9 | def __init__(self,embeddings,label_path='../wiki/Wiki_labels.txt'):
10 | self.data=embeddings
11 | self.X,self.y=read_node_label(label_path)
12 |
13 | # =read_label(label_path)
14 |
15 | def plot_embeddings(self):
16 | emb_list = []
17 | for k in self.X:
18 | emb_list.append(self.data[k])
19 | emb_list = np.array(emb_list)
20 | print(emb_list)
21 |
22 | model = TSNE(n_components=2)
23 | node_pos = model.fit_transform(emb_list)
24 |
25 |
26 | color_idx = {}
27 | for i in range(len(self.X)):
28 | color_idx.setdefault(self.y[i][0], [])
29 | color_idx[self.y[i][0]].append(i)
30 |
31 | for c, idx in color_idx.items():
32 | plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
33 | plt.legend()
34 | plt.show()
35 |
--------------------------------------------------------------------------------
/kon/model/embedding/util/test.txt:
--------------------------------------------------------------------------------
1 | 1 2
2 | 4 3
3 | 5 6
4 | 2 3
5 | 2 1
6 | 3 5
7 | 1 2
8 |
--------------------------------------------------------------------------------
/kon/model/embedding/util/util_tool.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import os
3 | import networkx as nx
4 | import pandas as pd
5 |
6 |
7 | def get_node_information(all_nodes):
8 | node2idx = {}
9 | idx2node = []
10 | node_size = 0
11 | for node in all_nodes:
12 | node2idx[node] = node_size
13 | idx2node.append(node)
14 | node_size += 1
15 | idx2node = idx2node
16 | node2idx = node2idx
17 | return idx2node, node2idx
18 |
19 | def save_edgelist(edgelist_list,save_path):
20 | if os.path.exists(save_path):
21 | os.remove(save_path)
22 |
23 | file=open(save_path,mode='a+')
24 | for edgelist in edgelist_list:
25 | file.writelines(edgelist)
26 |
27 | def read_graph(edgelist_path='../wiki/Wiki_edgelist.txt'):
28 | DG=nx.read_edgelist(
29 | edgelist_path,
30 | create_using=nx.DiGraph(),
31 | nodetype=None,
32 | data=[('weight',int)]
33 | )
34 |
35 | return DG
36 |
37 | def read_node_label(filename, skip_head=False):
38 | fin = open(filename, 'r')
39 | X = []
40 | Y = []
41 | while 1:
42 | if skip_head:
43 | fin.readline()
44 | l = fin.readline()
45 | if l == '':
46 | break
47 | vec = l.strip().split(' ')
48 | X.append(vec[0])
49 | Y.append(vec[1:])
50 | fin.close()
51 | return X, Y
52 |
53 | def read_label(label_path):
54 | data=pd.read_csv(label_path,header=None,sep=' ')
55 | nodes=data[0].tolist()
56 | label=data[1].tolist()
57 |
58 | return nodes,label
59 |
60 | if __name__=='__main__':
61 | pass
62 | # edgelist_list=['1 2\n','4 3\n','5 6\n','2 3\n','2 1\n','3 5\n','1 2\n']
63 | # save_path='kon.txt'
64 | # save_edgelist(edgelist_list,save_path)
65 | # read_graph(save_path)
66 |
67 |
--------------------------------------------------------------------------------
/kon/model/feature_eng/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/6/24 上午12:15
6 | @File :__init__.py.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
--------------------------------------------------------------------------------
/kon/model/feature_eng/base_model.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | '''=================================
3 | @Author :tix_hjq
4 | @Date :19-10-30 下午9:36
5 | ================================='''
6 | from sklearn.model_selection import KFold, StratifiedKFold
7 | from sklearn.metrics import mean_squared_error as mse
8 | from sklearn.metrics import f1_score, r2_score
9 | from numpy.random import random, shuffle
10 | import matplotlib.pyplot as plt
11 | from pandas import DataFrame
12 | from tqdm import tqdm
13 | import lightgbm as lgb
14 | import pandas as pd
15 | import numpy as np
16 | import warnings
17 | import os
18 | import gc
19 | import re
20 | import datetime
21 | import sys
22 |
23 | warnings.filterwarnings("ignore")
24 |
25 | pd.set_option('display.max_columns', None)
26 | pd.set_option('display.max_rows', None)
27 | pd.set_option('max_colwidth', 100)
28 |
29 | print(os.getcwd())
30 | #----------------------------------------------------
31 | class base_model():
32 | def __init__(self,save_folder,random_state=2048):
33 | print('base model is backend')
34 | self.random_state=random_state
35 | self.save_folder=save_folder
36 |
37 | def model_fit(self,X_train,y_train,cate_fea,X_vail,y_vail,is_pred=True,test_data=None,loss=['cross_entropy','binary'],is_classiffy=True,threshold=0.103):
38 | if is_classiffy:
39 | loss=loss[0]
40 | else:
41 | loss=loss[1]
42 |
43 | lgb_model = lgb.LGBMRegressor(
44 | num_leaves=40, reg_alpha=1, reg_lambda=0.1, objective=loss,
45 | max_depth=-1, learning_rate=0.05, min_child_samples=5, random_state=self.random_state,
46 | n_estimators=8000, subsample=0.8, colsample_bytree=0.8,is_unbalance=True,
47 | device='gpu'
48 | # n_jobs=-1
49 | )
50 |
51 | lgb_model.fit(X_train,y_train,eval_set=[(X_vail,y_vail)],eval_metric='auc',
52 | categorical_feature=cate_fea,
53 | early_stopping_rounds=300,verbose=10)
54 |
55 | result_weight=lgb_model.best_score_['valid_0']['auc']
56 | # result_weight=lgb_model.best_score_['training']['binary_logloss']
57 |
58 | model_import = DataFrame()
59 | model_import['feature'] = X_train.columns.tolist()
60 | model_import['feature_importance'] = lgb_model.feature_importances_
61 | model_import['model_weight'] = result_weight
62 | model_import.sort_values(by=['feature_importance'], ascending=False, inplace=True)
63 | zero_fea_list = model_import[model_import['feature_importance'] != 0]['feature'].tolist()
64 |
65 | print(model_import.head())
66 | print('-------------------------------')
67 |
68 | if is_classiffy:
69 | vail_y_pred = lgb_model.predict(X_vail, num_iteration=lgb_model.best_iteration_)
70 | vail_result = DataFrame(data=vail_y_pred, columns=['vail_pred'])
71 | vail_result['y_vail'] = y_vail
72 | vail_result.sort_values(['vail_pred'], ascending=False, inplace=True)
73 | vail_result.reset_index(inplace=True)
74 |
75 | del vail_result['index']
76 | gc.collect()
77 |
78 | vail_result.loc[vail_result.index <= int(vail_result.shape[0] * threshold), 'vail_pred'] = 1
79 | vail_result.loc[vail_result.vail_pred != 1, 'vail_pred'] = 0
80 | print(vail_result.head())
81 | try:
82 | print(f1_score(y_pred=vail_result['vail_pred'].tolist(),y_true=vail_result['y_vail'].tolist()))
83 | except ValueError:
84 | print('ERROR')
85 | del vail_result
86 |
87 | if is_pred==True:
88 | result_data = np.array(lgb_model.predict(test_data, num_iteration=lgb_model.best_iteration_ + 10))
89 | result_=DataFrame(columns=['result'],data=result_data)
90 | result_['weight']=result_weight
91 | return result_,zero_fea_list,model_import
92 | return zero_fea_list,model_import
93 |
94 |
95 |
96 | def avg_model_pred(self,result_data,n_split,test_data,is_plot=True,is_avg=True):
97 | print(result_data.head())
98 |
99 | # cal weight_avg_result
100 | result_cols = []
101 | weight_cols = []
102 | for i in range(0, n_split):
103 | result_cols.append('result_' + str(i))
104 | weight_cols.append('weight_' + str(i))
105 |
106 | result_data['result'] = 0
107 |
108 | for w_col, r_col in zip(weight_cols, result_cols):
109 | if not is_avg:
110 | result_data[w_col] /= result_data['weight']
111 | else:
112 | result_data[w_col]=1/n_split
113 | print(result_data[w_col].head())
114 | result_data[r_col] *= result_data[w_col]
115 |
116 | for col in result_cols:
117 | result_data['result'] += result_data[col]
118 |
119 | score = result_data['weight'].unique().tolist()[0] / n_split
120 |
121 | submit_data = DataFrame()
122 | submit_data['ID'] = test_data.ID.tolist()
123 | submit_result = []
124 |
125 | for r in result_data.result:
126 | if r <= 0:
127 | submit_result.append(0.1)
128 | else:
129 | submit_result.append(r)
130 | submit_data['Label'] = submit_result
131 |
132 | del result_data
133 | gc.collect()
134 |
135 | print('model_score:{}'.format(score))
136 |
137 | if is_plot:
138 | data = DataFrame(submit_data.Label.value_counts()).reset_index()
139 | plt.bar(data['index'], data['Label'])
140 |
141 | return submit_data,score
142 |
143 |
144 | def n_fold_fit(self,train_data,cols,cate_col,test_data=None,label_col='Label',is_pred=True):
145 | #train by k_fold
146 | result_data=DataFrame()
147 | if is_pred:
148 | result_data['weight']=[0]*test_data.shape[0]
149 | fea_filter =[]
150 | n_split=10
151 | rank=0
152 |
153 | k=StratifiedKFold(n_splits=n_split,random_state=self.random_state,shuffle=True)
154 |
155 | all_feature_important=DataFrame()
156 | all_feature_important['feature']=cols
157 | for train_idx,test_idx in tqdm(k.split(train_data[cols],train_data[label_col]),desc='k_split_fitting'):
158 | X_train=train_data[cols].loc[train_idx]
159 | X_vail=train_data[cols].loc[test_idx]
160 |
161 | y_train=train_data[[label_col]].loc[train_idx]
162 | y_vail=train_data[[label_col]].loc[test_idx]
163 |
164 | if is_pred:
165 | result_,zero_fea,feature_important=self.model_fit(X_train=X_train,y_train=y_train,X_vail=X_vail,y_vail=y_vail,test_data=test_data[cols],cate_fea=cate_col,is_pred=is_pred)
166 | result_data['result_'+str(rank)]=result_['result']
167 | result_data['weight_'+str(rank)]=result_['weight']
168 | result_data['weight']+=result_['weight']
169 | del result_
170 | gc.collect()
171 |
172 | if not is_pred:
173 | zero_fea,feature_important=self.model_fit(X_train=X_train,y_train=y_train,X_vail=X_vail,y_vail=y_vail,cate_fea=cate_col,is_pred=is_pred)
174 |
175 | feature_important.columns=['feature']+[str(col)+'_'+str(rank) for col in feature_important.columns.tolist()[1:]]
176 | all_feature_important=all_feature_important.merge(feature_important,'left',on=['feature'])
177 | fea_filter.append(zero_fea)
178 | rank+=1
179 |
180 | np.save(self.save_folder+'zero_feature',fea_filter)
181 |
182 | return result_data,n_split,all_feature_important,fea_filter
183 |
184 | def save_feature_submit(self,submit_data,score,cols,cate_fea):
185 | cate_fea_label = []
186 | for col in cols:
187 | if col in cate_fea:
188 | cate_fea_label.append(1)
189 | else:
190 | cate_fea_label.append(0)
191 |
192 | model_features = DataFrame()
193 | model_features['cols'] = cols
194 | model_features['is_cate'] = cate_fea_label
195 |
196 | feature_path = self.save_folder+'use_feature/' + str(datetime.datetime.now().date()) + '/'
197 | result_path = self.save_folder+ 'result/' + str(datetime.datetime.now().date()) + '/'
198 |
199 | for path in [feature_path, result_path]:
200 | if not os.path.exists(path):
201 | os.mkdir(path)
202 |
203 | model_features.to_csv(
204 | feature_path + 'model_feature_' + str(datetime.datetime.now()) + '_' + str(score) + '.csv', index=None)
205 |
206 | submit_data.drop(columns=['weight'],inplace=True)
207 | submit_data.to_csv(result_path + 'submit_' + str(datetime.datetime.now()) + '_' + str(score) + '.csv',
208 | index=None)
209 |
210 | def fit_transform(self,train_data:DataFrame,use_cols,cate_cols,label_col:str,test_data=None,is_pred=True):
211 | if is_pred:
212 | result_data,n_split,feature_important,zero_fea=self.n_fold_fit(train_data=train_data,test_data=test_data,label_col=label_col,cols=use_cols,cate_col=cate_cols,is_pred=is_pred)
213 | submit_data,score=self.avg_model_pred(result_data=result_data,n_split=n_split,test_data=test_data)
214 | self.save_feature_submit(score=score,submit_data=submit_data,cate_fea=cate_cols,cols=use_cols)
215 | else:
216 | result_data,n_split,feature_important,zero_fea=self.n_fold_fit(train_data=train_data,label_col=label_col,is_pred=is_pred,cols=use_cols,cate_col=cate_cols)
217 | return feature_important,zero_fea
218 |
219 | def single_fit_transform(self,X_train,y_train,X_vail,y_vail,cate_cols,test_data,pred_id,is_classiffy=True,threshold=0.103):
220 | result_, zero_fea_list, model_import=self.model_fit(X_train,y_train,cate_cols,X_vail,y_vail,test_data=test_data,is_classiffy=is_classiffy,threshold=threshold)
221 | result_['id']=pred_id
222 | score=result_['weight'].unique().tolist()[0]
223 | cols=X_train.columns.tolist()
224 | self.save_feature_submit(score=score, submit_data=result_, cate_fea=cate_cols, cols=cols)
225 |
226 | if __name__=='__main__':
227 | data_folder ='../../data/'
228 | submit_data_folder = data_folder + 'submit_data/'
229 |
230 | from sklearn.datasets import load_iris
231 | iris = load_iris()
232 | train_data=iris.data
233 | target_data=iris.target
234 | train_fea=iris.feature_names
235 | train_data=DataFrame(data=train_data,columns=train_fea)
236 | target_data=DataFrame(data=target_data,columns=['target'])
237 | from sklearn.model_selection import train_test_split
238 | X_train,X_test,y_train,y_test=train_test_split(train_data,target_data,test_size=0.3,random_state=2048)
239 | base_model(submit_data_folder).single_fit_transform(X_train,y_train,X_test,y_test,cate_cols=[],test_data=train_data,pred_id=train_data.index.tolist(),is_classiffy=False,threshold=0.103)
--------------------------------------------------------------------------------
/kon/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/5/29 下午4:03
6 | @File :__init__.py.py
7 | ================================='''
8 |
9 |
--------------------------------------------------------------------------------
/kon/utils/data_prepare.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | '''=================================
3 | @Author :tix_hjq
4 | @Date :2020/5/2 下午5:14
5 | @File :data_prepare.py
6 | ================================='''
7 | from sklearn.preprocessing import LabelEncoder
8 | from numpy.random import random, shuffle
9 | from pandas import DataFrame
10 | import tensorflow as tf
11 | from sklearn.preprocessing import MinMaxScaler
12 | import pandas as pd
13 | import numpy as np
14 | import warnings
15 | import os
16 | from collections import namedtuple
17 |
18 | from kon.model.ctr_model.layer.interactive_layer.interactive_layer import SparseEmbed
19 | from kon.model.feature_eng.feature_transform import feature_tool
20 | from kon.model.feature_eng.base_model import base_model
21 | import multiprocessing as mp
22 |
23 | warnings.filterwarnings("ignore")
24 | pd.set_option('display.max_columns', None)
25 | pd.set_option('display.max_rows', None)
26 | pd.set_option('max_colwidth', 100)
27 |
28 | print(os.getcwd())
29 | #----------------------------------------------------
30 | data_folder = '../../data/'
31 | origin_data_folder = data_folder + 'origin_data/'
32 | submit_data_folder = data_folder + 'submit_data/'
33 | eda_data_folder = data_folder + 'eda_data/'
34 | fea_data_folder = data_folder + 'fea_data/'
35 | #-----------------------------------------------------------------
36 | model_tool = base_model(submit_data_folder)
37 | fea_tool = feature_tool(fea_data_folder)
38 | #-----------------------------------------------------------------
39 | class InputFeature(object):
40 | def __init__(self,denseInfo:list=None,sparseInfo:list=None,seqInfo:list=None,denseInputs:list=None,sparseInputs:list=None,seqInputs:list=None,linearEmbed:list=None,sparseEmbed:list=None,seqEmbedList:list=None):
41 | self.dense_info=denseInfo
42 | self.sparse_info=sparseInfo
43 | self.seq_info=seqInfo
44 | self.dense_inputs=denseInputs
45 | self.sparse_inputs=sparseInputs
46 | self.seq_inputs=seqInputs
47 | self.linear_embed=linearEmbed
48 | self.sparse_embed=sparseEmbed
49 | self.seq_embed_list=seqEmbedList
50 |
51 | def toList(self):
52 | from pprint import pprint
53 | pprint([self.dense_info,self.sparse_info,self.seq_info,self.dense_inputs,self.sparse_inputs,
54 | self.seq_inputs,self.linear_embed,self.sparse_embed,self.seq_embed_list])
55 |
56 | class data_prepare(object):
57 | def __init__(self,batch_size=None,use_shuffle=True,cpu_core=None):
58 | print('data prepare is backend')
59 | self.sparseFea=namedtuple('sparseFea',['fea_name','word_size','input_dim','cross_unit','linear_unit','pre_weight','mask_zero','is_trainable','input_length','sample_num','batch_size','emb_reg'])
60 | self.denseFea=namedtuple('denseFea',['fea_name','batch_size'])
61 | self.batch_size=batch_size
62 | self.use_shuffle=use_shuffle
63 | self.cpu_core=mp.cpu_count() if cpu_core==None else cpu_core
64 |
65 | def FeatureInput(self,sparseInfo:list=None, denseInfo:list=None, seqInfo=None,useLinear:bool=False,useAddLinear:bool=False,useFlattenLinear:bool=False,useFlattenSparse:bool=False):
66 |
67 | linearEmbed,sparseEmbed,seqEmbed, seqMask=None,None,None,None
68 | [denseInputs, sparseInputs, seqInputs] =self.df_prepare(sparseInfo=sparseInfo, denseInfo=denseInfo,seqInfo=seqInfo)
69 | if useLinear:
70 | linearEmbed = SparseEmbed(sparseInfo, use_flatten=useFlattenLinear, is_linear=True,use_add=useAddLinear)(sparseInputs)
71 | if sparseInputs:
72 | sparseEmbed = SparseEmbed(sparseInfo, use_flatten=useFlattenSparse)(sparseInputs)
73 | if seqInputs:
74 | seqEmbed,seqMask= SparseEmbed(seqInfo,support_masking=True,mask_zero=True,is_linear=False,use_flatten=False)(seqInputs)
75 |
76 | return InputFeature(denseInfo,sparseInfo,seqInfo,denseInputs,sparseInputs,seqInputs,linearEmbed,sparseEmbed,[seqEmbed,seqMask])
77 |
78 | def concat_test_train(self, train_df: DataFrame, test_df: DataFrame):
79 | train_idx = train_df.index.tolist()
80 | test_idx = list(np.array(test_df.index) + train_idx[-1] + 1)
81 | df = pd.concat([train_df, test_df], ignore_index=True)
82 |
83 | return df, (train_idx, test_idx)
84 |
85 | def sparse_fea_deal(self,sparseDf:DataFrame,embed_dim=8,linear_dim=1,pre_weight=None,emb_reg=None):
86 | if not pre_weight:
87 | pre_weight=[None]*sparseDf.shape[1]
88 | if not emb_reg:
89 | emb_reg=[1e-8]*sparseDf.shape[1]
90 |
91 | sparseDf = sparseDf.fillna('-1')
92 | for fea in sparseDf:
93 | sparseDf[fea]=LabelEncoder().fit_transform(sparseDf[fea].astype('str'))
94 |
95 | sparseInfo=[self.sparseFea(
96 | fea_name=fea, input_dim=sparseDf[fea].shape[0],
97 | cross_unit=embed_dim, linear_unit=linear_dim,word_size=sparseDf[fea].nunique(),
98 | pre_weight=weight_,input_length=1,is_trainable=True,mask_zero=False,sample_num=None,
99 | batch_size=self.batch_size,emb_reg=reg
100 | ) for fea,weight_,reg in zip(sparseDf,pre_weight,emb_reg)]
101 |
102 | return sparseDf,sparseInfo
103 |
104 | def single_seq_deal(self,seq_list, is_str_list=True,is_str=False,max_len=None,sample_num=None):
105 | '''
106 | :param is_str_list:
107 | format:"[[1,2,3],[1,2,3]]"==>True
108 | else:
109 | format:[[1,2,3],[1,2,3]]==>False
110 | :param is_str:
111 | format: ['1,2','3']
112 | '''
113 | sample_seq={}
114 |
115 | if is_str_list:
116 | seq_list = fea_tool.batch_convert_list(seq_list)
117 | if is_str:
118 | seq_list = [str(i).split(',') for i in seq_list]
119 |
120 | w2str = [[str(j) for j in i] for i in seq_list]
121 | seq = [" ".join(i) for i in w2str]
122 |
123 | token = tf.keras.preprocessing.text.Tokenizer(lower=False, char_level=False, split=' ')
124 | token.fit_on_texts(seq)
125 | format_seq = token.texts_to_sequences(seq)
126 | format_seq = tf.keras.preprocessing.sequence.pad_sequences(format_seq, maxlen=max_len,value=0)
127 | seq_idx = token.word_index
128 |
129 | # if sample_num:
130 | # sample_seq=[[[label]+list(np.random.choice([i for i in seq if i!=label and i!=0],size=sample_num)) if label!=0 else []
131 | # for label in seq]for seq in format_seq]
132 |
133 | return (format_seq, seq_idx,sample_seq)
134 |
135 |
136 | def hard_search(self,seqData:list,seqCate:list,targetCate:list)->(list,list):
137 | '''
138 | SIM HardSearch[same cate to similar]
139 | :return format(reduce seq,reduce seq cate)
140 | '''
141 | aimIdx=[[idx_ for idx_,cate_ in enumerate(cateList) if cate_==aimCate]
142 | for cateList, aimCate in zip(seqCate, targetCate)]
143 | aimList=np.array([[np.array(seq)[idx_],np.array(cate)[idx_]]
144 | if idx_!=[] else [[],[]] for seq,cate,idx_ in zip(seqData,seqCate,aimIdx)])
145 | seqData,seqCate=np.split(aimList,[1],axis=1)
146 |
147 | return seqData,seqCate
148 |
149 |
150 | def seq_deal(self,seqDf,embedding_dim:list,max_len:list=None,is_str_list=True,is_str=False,mask_zero=True,is_trainable=True,pre_weight:list=None,sample_num=None,use_wrap=True,emb_reg=None):
151 | '''
152 | notice:
153 | <1> seqDf:
154 | format===>single_seq_deal
155 | <2> preEmbeddingWeight:
156 | format===>[[fea1_weight],[fea2_weight]...]
157 | <3> sample_num:
158 | notice:
159 | negative must make sure len(seq)>=negative_num+1
160 | e.g
161 | negative_num:5==>get(5*negative sample)
162 |
163 | :param max_len: seq max length
164 | :param embedding_dim: seq embed dim
165 | :param is_str_list&is_str==> single_seq_deal introduce
166 | :param mask_zero: use mask==True
167 | :param is_trainable: use embed trainable==True
168 | :param pre_weight: embedding pre-train(e.g w2c as backend)
169 | :param use_wrap: use sparseFea wrap==True
170 | :return:seqDf,seqIdx,seqInfo
171 | '''
172 |
173 | if not pre_weight:
174 | pre_weight=[None]*seqDf.shape[1]
175 | if not max_len:
176 | max_len=[None]*seqDf.shape[1]
177 | if not emb_reg:
178 | emb_reg=[1e-8]*seqDf.shape[1]
179 |
180 | seq_tuple={
181 | seq_fea:self.single_seq_deal(seqDf[seq_fea],is_str_list=is_str_list,is_str=is_str,max_len=len_,sample_num=sample_num)
182 | for seq_fea,len_ in zip(seqDf,max_len)}
183 | seqDf={key:seq_tuple[key][0] for key in seq_tuple}
184 | seqIdx = {key: seq_tuple[key][1] for key in seq_tuple}
185 |
186 | sample_seq = None
187 | if sample_num:
188 | sample_seq={key:[i[1:] for i in seq_tuple[key][2]] for key in seq_tuple}
189 | del seq_tuple
190 |
191 | seqInfo=None
192 | if use_wrap:
193 | seqDf,seqInfo=self.sparse_wrap(seqDf,seqIdx=seqIdx,embedding_dim=embedding_dim,max_len=max_len,mask_zero=mask_zero,is_trainable=is_trainable,pre_weight=pre_weight,sample_num=sample_num,emb_reg=emb_reg)
194 |
195 | return seqDf,seqIdx,seqInfo
196 |
197 | def sparse_wrap(self,seqDf,embedding_dim:list,seqIdx=None,seqIdx_path=None,max_len:list=None,mask_zero=True,is_trainable=True,pre_weight:list=None,sample_num=None,emb_reg=None):
198 |
199 | if not pre_weight:
200 | pre_weight=[None]*seqDf.shape[1]
201 | if not max_len:
202 | max_len=[None]*seqDf.shape[1]
203 | if seqIdx_path:
204 | seqIdx = fea_tool.pickle_op(seqIdx_path, is_save=False)
205 | if emb_reg==None:
206 | emb_reg=[1e-8]*seqDf.shape[1]
207 |
208 | seqInfo = [self.sparseFea(
209 | fea_name=seq_fea, word_size=len(seqIdx[seq_key].keys()) + 1, input_dim=seqDf[seq_fea].shape[0],
210 | cross_unit=embed_, linear_unit=1, pre_weight=weight_, mask_zero=mask_zero,
211 | is_trainable=is_trainable, input_length=max_, sample_num=sample_num,batch_size=self.batch_size,emb_reg=reg
212 | ) for seq_fea, seq_key, weight_, max_, embed_,reg in zip(seqDf, seqIdx, pre_weight, max_len, embedding_dim,emb_reg)]
213 |
214 | if not isinstance(seqDf,dict):
215 | seqDf={fea:np.array([[int(j) for j in i.split(',')]for i in seqDf[fea].values]) for fea in seqDf}
216 |
217 | return seqDf,seqInfo
218 |
219 | def generator_session(self,df, group_cols: list, item_cols: str,
220 | session_maxLen, use_check=False):
221 | '''
222 | :param df:
223 | format:
224 | user_id time item
225 | 1 1 1
226 | :param group_cols:
227 | format: list ==> [user,time]
228 | [groupby sign index:user_id,groupby time index:session split time]
229 | :param item_cols:
230 | item cols
231 | :param use_check:
232 | print=>session size distribute,can to find session_maxLen
233 | :return:
234 | DataFrame==> columns=user_id,session_list
235 | '''
236 |
237 | if use_check:
238 | def need_(x):
239 | return len(x.tolist())
240 |
241 | print(df.groupby(group_cols)[item_cols].agg(need_).reset_index()[item_cols].value_counts().head(20))
242 | else:
243 | def session_seq(x):
244 | return ','.join(x.tolist())
245 |
246 | df = df.groupby(group_cols)[item_cols].agg(session_seq).reset_index().rename(
247 | columns={item_cols: '{}_session'.format(item_cols)})
248 | df['{}_session'.format(item_cols)] = [','.join([str(j) for j in i]) for i in
249 | tf.keras.preprocessing.sequence.pad_sequences(
250 | [i.split(',') for i in df['{}_session'.format(item_cols)]],
251 | maxlen=session_maxLen)]
252 | del df[group_cols[1]]
253 |
254 | return df
255 |
256 | def generator_seq(self,df, group_cols, item_cols, session_maxLen, session_maxNum, use_check=False):
257 | '''
258 | :param df:df['user_id','{}_session']
259 | :param group_cols: same to generator_session
260 | :param item_cols: same to generator_session
261 | :param session_maxLen: same to generator_session
262 | :param session_maxNum: same to generator_session
263 | :param use_check: same to generator_session
264 | :return: df==>
265 | '''
266 | if use_check:
267 | def need_(x):
268 | return len(x.tolist())
269 |
270 | print(df.groupby([group_cols[0]])['{}_session'.format(item_cols)].agg(
271 | need_).reset_index().click_item_session.value_counts().head(20))
272 | else:
273 | def seq(x):
274 | use_list = x.tolist()
275 | if len(use_list) > session_maxNum:
276 | use_list = use_list[:session_maxNum]
277 | else:
278 | use_list += [','.join([str(i) for i in [0] * session_maxLen]) for i in
279 | range(session_maxNum - len(use_list))]
280 |
281 | need_list = ""
282 | for i in use_list:
283 | need_list += i + ','
284 | return need_list[:-1]
285 |
286 | df = df.groupby([group_cols[0]])['{}_session'.format(item_cols)].agg(seq).reset_index()
287 |
288 | return df
289 |
290 | def sparse_prepare(self, sparse_info: list):
291 | return [tf.keras.Input(batch_shape=(info_.batch_size,info_.input_length,),
292 | name=info_.fea_name) for info_ in sparse_info]
293 |
294 | def dense_fea_deal(self,denseDf:DataFrame,is_fillna=True):
295 | if is_fillna:
296 | denseDf = DataFrame({fea: denseDf[fea].fillna(denseDf[fea].mode()[0]) for fea in denseDf})
297 | [denseDf[fea].fillna(denseDf[fea].mode()[0])for fea in denseDf]
298 | denseDf[denseDf.columns.tolist()]=MinMaxScaler(feature_range=(0,1)).fit_transform(denseDf)
299 | denseInfo=[self.denseFea(fea,self.batch_size) for fea in denseDf]
300 |
301 | return denseDf,denseInfo
302 |
303 | def dense_prepare(self,dense_info:list):
304 | return [tf.keras.Input(batch_shape=(info_.batch_size,1,), name=info_.fea_name)for info_ in dense_info]
305 |
306 | def df_format(self,df:DataFrame):
307 | df_={}
308 | for fea in df:
309 | df_.update({fea:df[fea].values})
310 | return df_
311 |
312 | def df_format_input(self,df:list):
313 | df=pd.concat(df,axis=1)
314 | df=self.df_format(df)
315 | return df
316 |
317 | def df_prepare(self,sparseInfo:list=None,denseInfo:list=None,seqInfo:list=None):
318 | df_name=[]
319 | inputs=[[],[],[]]
320 | if denseInfo:
321 | dense_inputs=self.dense_prepare(denseInfo)
322 | df_name+=[info_.fea_name for info_ in denseInfo]
323 | inputs[0]=dense_inputs
324 | if sparseInfo:
325 | sparse_inputs=self.sparse_prepare(sparseInfo)
326 | df_name+=[info_.fea_name for info_ in sparseInfo]
327 | inputs[1]=sparse_inputs
328 | if seqInfo:
329 | seq_inputs=self.sparse_prepare(seqInfo)
330 | df_name+=[info_.fea_name for info_ in seqInfo]
331 | inputs[2]=seq_inputs
332 |
333 | return inputs
334 |
335 | def data_pipeline(self,dataSet:tuple):
336 | dataSet=tf.data.Dataset.from_tensor_slices(dataSet)
337 | return dataSet.shuffle(2048).repeat(2).batch(batch_size=self.batch_size).prefetch(2)
338 |
339 | def extract_train_test(self,train_idx, test_idx,targetDf,sparseDf=None, denseDf=None,seqDf=None,use_softmax=True):
340 | try:
341 | train_dense = denseDf.loc[train_idx]
342 | test_dense = denseDf.loc[test_idx]
343 | except AttributeError:
344 | train_dense,test_dense=None,None
345 |
346 | try:
347 | train_sparse = sparseDf.loc[train_idx]
348 | test_sparse = sparseDf.loc[test_idx]
349 | except AttributeError:
350 | train_sparse, test_sparse = None, None
351 |
352 | try:
353 | train_seq={key:seqDf[key][train_idx] for key in seqDf}
354 | test_seq={key:seqDf[key][test_idx] for key in seqDf}
355 | except TypeError:
356 | train_seq,test_seq= {}, {}
357 |
358 | if use_softmax:
359 | targetDf=tf.keras.utils.to_categorical(targetDf.values.tolist())
360 | else:
361 | targetDf=targetDf.values
362 |
363 | y_train=targetDf[train_idx]
364 | y_test=targetDf[test_idx]
365 |
366 | train_df=self.df_format_input([train_dense,train_sparse])
367 | test_df=self.df_format_input([test_dense,test_sparse])
368 | train_df.update(train_seq)
369 | test_df.update(test_seq)
370 |
371 | if self.batch_size!=None:
372 | train_df=self.static_batch(train_df)
373 | test_df=self.static_batch(test_df)
374 | y_train=self.static_batch(y_train)
375 | y_test=self.static_batch(y_test)
376 |
377 | train=self.data_pipeline((train_df,y_train))
378 | test=self.data_pipeline((test_df,y_test))
379 |
380 | return train,test
381 |
382 | def input_loc(self,df,use_idx:list):
383 | '''
384 | :param df: format df
385 | :param use_idx: use idx,e.g k-flods
386 | :return: df[use idx]
387 | '''
388 | if isinstance(df, dict):
389 | return {key: np.array(df[key])[use_idx] for key in df}
390 | else:
391 | return df[use_idx]
392 |
393 | def static_batch(self,df):
394 | if isinstance(df,dict):
395 | df_num=np.array(df[list(df.keys())[0]]).shape[0]
396 | else:
397 | df_num=len(df)
398 |
399 | batch_num = (df_num // self.batch_size) * self.batch_size
400 | need_idx = np.random.choice(list(range(df_num)), size=batch_num)
401 | if self.use_shuffle:
402 | shuffle(need_idx)
403 |
404 | df = self.input_loc(df, use_idx=need_idx)
405 |
406 | return df
407 |
408 | def split_val_set(self,train_df,y_train,train_index,val_index):
409 | train_x = self.input_loc(df=train_df, use_idx=train_index)
410 | train_y = self.input_loc(df=y_train, use_idx=train_index)
411 | val_x = self.input_loc(df=train_df, use_idx=val_index)
412 | val_y = self.input_loc(df=y_train, use_idx=val_index)
413 |
414 | return train_x,train_y,(val_x,val_y)
--------------------------------------------------------------------------------
/kon/wrapper/Feature_Columns.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/9/30 下午7:56
6 | @File :Feature_Columns.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
9 | from collections import namedtuple
10 |
11 | class NumsFeaWrapper(object):
12 | def __init__(self):
13 | self.denseFea=namedtuple('denseFea',['fea_name','batch_size'])
14 |
15 | class SparseFeaWrapper(object):
16 | def __init__(self):
17 | self.sparseFeature=namedtuple('sparseFea',
18 | ['fea_name', 'word_size', 'input_dim', 'cross_unit', 'linear_unit', 'pre_weight', 'mask_zero',
19 | 'is_trainable', 'input_length', 'sample_num', 'batch_size', 'emb_reg'])
20 |
21 | class NumsFea(NumsFeaWrapper):
22 | def __init__(self):
23 | super(NumsFea, self).__init__()
24 | self.fea=self.denseFea
25 |
26 | class CateFea(SparseFeaWrapper):
27 | def __init__(self):
28 | super(CateFea, self).__init__()
29 | self.fea=self.sparseFeature
30 |
31 | class BehaviorFea(SparseFeaWrapper):
32 | def __init__(self):
33 | super(BehaviorFea, self).__init__()
34 | self.fea = self.sparseFeature
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/kon/wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # _*_ coding:utf-8 _*_
3 | '''=================================
4 | @Author :tix_hjq
5 | @Date :2020/9/30 下午7:55
6 | @File :__init__.py.py
7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com
8 | ================================='''
9 | from sklearn.model_selection import KFold, StratifiedKFold
10 | from sklearn.metrics import mean_squared_error as mse
11 | from sklearn.preprocessing import LabelEncoder
12 | from sklearn.metrics import f1_score, r2_score
13 | from hyperopt import fmin, tpe, hp, partial
14 | from numpy.random import random, shuffle
15 | import matplotlib.pyplot as plt
16 | from pandas import DataFrame
17 | import tensorflow as tf
18 | from tqdm import tqdm
19 | from PIL import Image
20 | import lightgbm as lgb
21 | import networkx as nx
22 | import pandas as pd
23 | import numpy as np
24 | import warnings
25 | import cv2
26 | import os
27 | import gc
28 | import re
29 | import datetime
30 | import sys
31 | from kon.model.embedding.setence_model import *
32 | from kon.model.feature_eng.feature_transform import feature_tool
33 | from kon.model.feature_eng.base_model import base_model
34 | from kon.model.ctr_model.model.models import *
--------------------------------------------------------------------------------
/paper/Next Read/A Convolutional Click Prediction Model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/A Convolutional Click Prediction Model.pdf
--------------------------------------------------------------------------------
/paper/Next Read/[DSSM] Learning Deep Structured Semantic Models for Web Search using Clickthrough Data (UIUC 2013).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[DSSM] Learning Deep Structured Semantic Models for Web Search using Clickthrough Data (UIUC 2013).pdf
--------------------------------------------------------------------------------
/paper/Next Read/[ESMM] Entire Space Multi-Task Model - An Effective Approach for Estimating Post-Click Conversion Rate (Alibaba 2018).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[ESMM] Entire Space Multi-Task Model - An Effective Approach for Estimating Post-Click Conversion Rate (Alibaba 2018).pdf
--------------------------------------------------------------------------------
/paper/Next Read/[FAT-DeepFFM]Field Attentive Deep Field-aware Factorization Machine[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FAT-DeepFFM]Field Attentive Deep Field-aware Factorization Machine[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[FGCNN]Feature Generation by Convolutional Neural Network forClick-Through Rate Predicti[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FGCNN]Feature Generation by Convolutional Neural Network forClick-Through Rate Predicti[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[FLEN] Leveraging Field for Scalable CTR Predicti[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FLEN] Leveraging Field for Scalable CTR Predicti[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[FTRL] Ad Click Prediction a View from the Trenches (Google 2013).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FTRL] Ad Click Prediction a View from the Trenches (Google 2013).pdf
--------------------------------------------------------------------------------
/paper/Next Read/[Fi-GNN]Modeling Feature Interactions via Graph Neural Networks for CTR Prediction[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[Fi-GNN]Modeling Feature Interactions via Graph Neural Networks for CTR Prediction[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[FiBiNET]Combining Feature Importance and Bilinear featureInteraction for Click-Through Rate Predict[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FiBiNET]Combining Feature Importance and Bilinear featureInteraction for Click-Through Rate Predict[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[GBDT+LR] Practical Lessons from Predicting Clicks on Ads at Facebook (Facebook 2014).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[GBDT+LR] Practical Lessons from Predicting Clicks on Ads at Facebook (Facebook 2014).pdf
--------------------------------------------------------------------------------
/paper/Next Read/[Image CTR] Image Matters - Visually modeling user behaviors using Advanced Model Server (Alibaba 2018).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[Image CTR] Image Matters - Visually modeling user behaviors using Advanced Model Server (Alibaba 2018).pdf
--------------------------------------------------------------------------------
/paper/Next Read/[MINDN]Multi-Interest Network with Dynamic Routing for Recommendation at Tmall[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[MINDN]Multi-Interest Network with Dynamic Routing for Recommendation at Tmall[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[OENN]Order-aware Embedding Neural Network for CTR Predicti[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[OENN]Order-aware Embedding Neural Network for CTR Predicti[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[ONN]Operation-aware Neural Networks for User Response Prediction[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[ONN]Operation-aware Neural Networks for User Response Prediction[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[PS-PLM] Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction (Alibaba 2017).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[PS-PLM] Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction (Alibaba 2017).pdf
--------------------------------------------------------------------------------
/paper/Next Read/[RLAC]Representation Learning-Assisted Click-Through Rate Prediction[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[RLAC]Representation Learning-Assisted Click-Through Rate Prediction[2019].pdf
--------------------------------------------------------------------------------
/paper/Next Read/[Warm Up Cold-start Advertisements]Improving CTR Predictions via Learning to Learn ID Embeddings[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[Warm Up Cold-start Advertisements]Improving CTR Predictions via Learning to Learn ID Embeddings[2019].pdf
--------------------------------------------------------------------------------
/paper/README:
--------------------------------------------------------------------------------
1 | paper collection
2 |
--------------------------------------------------------------------------------
/paper/behavior/[ALSH]Asymmetric LSH for Sublinear Time Maximum Inner Product Search[2014].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[ALSH]Asymmetric LSH for Sublinear Time Maximum Inner Product Search[2014].pdf
--------------------------------------------------------------------------------
/paper/behavior/[BST]Behavior Sequence Transformer for E-commerce Recommendation in Alibaba[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[BST]Behavior Sequence Transformer for E-commerce Recommendation in Alibaba[2019].pdf
--------------------------------------------------------------------------------
/paper/behavior/[DIEN] Deep Interest Evolution Network for Click-Through Rate Prediction (Alibaba 2019).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DIEN] Deep Interest Evolution Network for Click-Through Rate Prediction (Alibaba 2019).pdf
--------------------------------------------------------------------------------
/paper/behavior/[DIN] Deep Interest Network for Click-Through Rate Prediction (Alibaba 2018).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DIN] Deep Interest Network for Click-Through Rate Prediction (Alibaba 2018).pdf
--------------------------------------------------------------------------------
/paper/behavior/[DSIN]Deep Session Interest Network for Click-Through Rate Predicti[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DSIN]Deep Session Interest Network for Click-Through Rate Predicti[2019].pdf
--------------------------------------------------------------------------------
/paper/behavior/[DSTN]Deep Spatio-Temporal Neural Networks for Click-Through Rate Prediction[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DSTN]Deep Spatio-Temporal Neural Networks for Click-Through Rate Prediction[2019].pdf
--------------------------------------------------------------------------------
/paper/behavior/[DTSF]Deep Time-Stream Framework for Click-Through Rate Prediction by Tracking Interest Evolution[2020].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DTSF]Deep Time-Stream Framework for Click-Through Rate Prediction by Tracking Interest Evolution[2020].pdf
--------------------------------------------------------------------------------
/paper/behavior/[LSM]Lifelong Sequential Modeling with Personalized Memorization for User Response Prediction[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[LSM]Lifelong Sequential Modeling with Personalized Memorization for User Response Prediction[2019].pdf
--------------------------------------------------------------------------------
/paper/behavior/[MIMN]Practice on Long Sequential User Behavior Modeling for Click-Through Rate Prediction[2019].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[MIMN]Practice on Long Sequential User Behavior Modeling for Click-Through Rate Prediction[2019].pdf
--------------------------------------------------------------------------------
/paper/behavior/[NTM]Neural Turing Machines[2014].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[NTM]Neural Turing Machines[2014].pdf
--------------------------------------------------------------------------------
/paper/behavior/[NTM]The_NTM_Introduction_And_Implementation[2017].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[NTM]The_NTM_Introduction_And_Implementation[2017].pdf
--------------------------------------------------------------------------------
/paper/behavior/[REFORMER] THE EFFICIENT TRANSFORMER[2020].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[REFORMER] THE EFFICIENT TRANSFORMER[2020].pdf
--------------------------------------------------------------------------------
/paper/behavior/[SIM]Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction[2020].pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[SIM]Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction[2020].pdf
--------------------------------------------------------------------------------
/paper/behavior/[Self-Attention]Attention is all you need(Google 2017).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[Self-Attention]Attention is all you need(Google 2017).pdf
--------------------------------------------------------------------------------
/paper/behavior/[SeqFM]Sequence-Aware Factorization Machines(2019).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[SeqFM]Sequence-Aware Factorization Machines(2019).pdf
--------------------------------------------------------------------------------
/paper/interactive/[AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf
--------------------------------------------------------------------------------
/paper/interactive/[AutoInt] AutoInt Automatic Feature Interaction Learning via Self-Attentive Neural Networks(CIKM 2019).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[AutoInt] AutoInt Automatic Feature Interaction Learning via Self-Attentive Neural Networks(CIKM 2019).pdf
--------------------------------------------------------------------------------
/paper/interactive/[DCN] Deep & Cross Network for Ad Click Predictions (Stanford 2017).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[DCN] Deep & Cross Network for Ad Click Predictions (Stanford 2017).pdf
--------------------------------------------------------------------------------
/paper/interactive/[Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf
--------------------------------------------------------------------------------
/paper/interactive/[DeepFM] A Factorization-Machine based Neural Network for CTR Prediction (HIT-Huawei 2017).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[DeepFM] A Factorization-Machine based Neural Network for CTR Prediction (HIT-Huawei 2017).pdf
--------------------------------------------------------------------------------
/paper/interactive/[FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf
--------------------------------------------------------------------------------
/paper/interactive/[FM] Fast Context-aware Recommendations with Factorization Machines (UKON 2011).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[FM] Fast Context-aware Recommendations with Factorization Machines (UKON 2011).pdf
--------------------------------------------------------------------------------
/paper/interactive/[FNN] Deep Learning over Multi-field Categorical Data (UCL 2016).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[FNN] Deep Learning over Multi-field Categorical Data (UCL 2016).pdf
--------------------------------------------------------------------------------
/paper/interactive/[LR] Predicting Clicks - Estimating the Click-Through Rate for New Ads (Microsoft 2007).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[LR] Predicting Clicks - Estimating the Click-Through Rate for New Ads (Microsoft 2007).pdf
--------------------------------------------------------------------------------
/paper/interactive/[NFM] Neural Factorization Machines for Sparse Predictive Analytics (NUS 2017).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[NFM] Neural Factorization Machines for Sparse Predictive Analytics (NUS 2017).pdf
--------------------------------------------------------------------------------
/paper/interactive/[PNN] Product-based Neural Networks for User Response Prediction (SJTU 2016).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[PNN] Product-based Neural Networks for User Response Prediction (SJTU 2016).pdf
--------------------------------------------------------------------------------
/paper/interactive/[Wide & Deep] Wide & Deep Learning for Recommender Systems (Google 2016).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[Wide & Deep] Wide & Deep Learning for Recommender Systems (Google 2016).pdf
--------------------------------------------------------------------------------
/paper/interactive/[xDeepFM] xDeepFM - Combining Explicit and Implicit Feature Interactions for Recommender Systems (USTC 2018).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[xDeepFM] xDeepFM - Combining Explicit and Implicit Feature Interactions for Recommender Systems (USTC 2018).pdf
--------------------------------------------------------------------------------