├── .idea ├── .gitignore ├── CTR_Function.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── LICENSE ├── README.md ├── data └── README.md ├── example └── ctr_example │ ├── dmin_seq.py │ ├── gsp_seq.py │ ├── seq.py │ ├── session.py │ ├── session_prepare.py │ ├── sim_seq.py │ ├── timeInterval.py │ └── un_seq.py ├── kon ├── __init__.py ├── model │ ├── __init__.py │ ├── ctr_model │ │ ├── README.md │ │ ├── __init__.py │ │ ├── layer │ │ │ ├── __init__.py │ │ │ ├── behavior_layer │ │ │ │ ├── __init__.py │ │ │ │ ├── behavior_layer.py │ │ │ │ └── rnn_demo.py │ │ │ ├── core_layer │ │ │ │ ├── __init__.py │ │ │ │ └── core_layer.py │ │ │ └── interactive_layer │ │ │ │ ├── __init__.py │ │ │ │ └── interactive_layer.py │ │ └── model │ │ │ ├── __init__.py │ │ │ └── models.py │ ├── cvr_model │ │ ├── __init__.py │ │ ├── layer │ │ │ └── __init__.py │ │ └── model │ │ │ └── __init__.py │ ├── embedding │ │ ├── .idea │ │ │ ├── embedding.iml │ │ │ ├── encodings.xml │ │ │ ├── misc.xml │ │ │ └── modules.xml │ │ ├── README.md │ │ ├── __init__.py │ │ ├── logs │ │ │ └── 0 │ │ │ │ ├── best_weights.h5 │ │ │ │ ├── events.out.tfevents.1564644409.dream-System │ │ │ │ ├── events.out.tfevents.1565180032.dream-System │ │ │ │ └── events.out.tfevents.1565180080.dream-System │ │ ├── model_test.py │ │ ├── other │ │ │ ├── __init__.py │ │ │ ├── other-collections.py │ │ │ └── other-networks.py │ │ ├── setence_model │ │ │ ├── __init__.py │ │ │ ├── backone_language_model.py │ │ │ ├── backone_optimize.py │ │ │ ├── deepwalk.py │ │ │ ├── line.py │ │ │ ├── logs │ │ │ │ └── 0 │ │ │ │ │ ├── best_weights.h5 │ │ │ │ │ ├── events.out.tfevents.1565011299.dream-System │ │ │ │ │ ├── events.out.tfevents.1565011324.dream-System │ │ │ │ │ ├── events.out.tfevents.1565011336.dream-System │ │ │ │ │ ├── events.out.tfevents.1565013918.dream-System │ │ │ │ │ ├── events.out.tfevents.1565013943.dream-System │ │ │ │ │ ├── events.out.tfevents.1565013958.dream-System │ │ │ │ │ ├── events.out.tfevents.1565013985.dream-System │ │ │ │ │ ├── events.out.tfevents.1565014029.dream-System │ │ │ │ │ ├── events.out.tfevents.1565014060.dream-System │ │ │ │ │ ├── events.out.tfevents.1565014368.dream-System │ │ │ │ │ ├── events.out.tfevents.1565014404.dream-System │ │ │ │ │ ├── events.out.tfevents.1565014481.dream-System │ │ │ │ │ ├── events.out.tfevents.1565014728.dream-System │ │ │ │ │ ├── events.out.tfevents.1565014760.dream-System │ │ │ │ │ ├── events.out.tfevents.1565014805.dream-System │ │ │ │ │ ├── events.out.tfevents.1565015151.dream-System │ │ │ │ │ ├── events.out.tfevents.1565015263.dream-System │ │ │ │ │ ├── events.out.tfevents.1565015277.dream-System │ │ │ │ │ ├── events.out.tfevents.1565015308.dream-System │ │ │ │ │ ├── events.out.tfevents.1565057550.dream-System │ │ │ │ │ ├── events.out.tfevents.1565058087.dream-System │ │ │ │ │ ├── events.out.tfevents.1565058252.dream-System │ │ │ │ │ ├── events.out.tfevents.1565058261.dream-System │ │ │ │ │ ├── events.out.tfevents.1565058653.dream-System │ │ │ │ │ ├── events.out.tfevents.1565058673.dream-System │ │ │ │ │ ├── events.out.tfevents.1565058702.dream-System │ │ │ │ │ ├── events.out.tfevents.1565059234.dream-System │ │ │ │ │ ├── events.out.tfevents.1565059587.dream-System │ │ │ │ │ ├── events.out.tfevents.1565059681.dream-System │ │ │ │ │ ├── events.out.tfevents.1565059708.dream-System │ │ │ │ │ ├── events.out.tfevents.1565059726.dream-System │ │ │ │ │ ├── events.out.tfevents.1565059768.dream-System │ │ │ │ │ ├── events.out.tfevents.1565059787.dream-System │ │ │ │ │ ├── events.out.tfevents.1565060677.dream-System │ │ │ │ │ ├── events.out.tfevents.1565060761.dream-System │ │ │ │ │ ├── events.out.tfevents.1565060853.dream-System │ │ │ │ │ ├── events.out.tfevents.1565069889.dream-System │ │ │ │ │ ├── events.out.tfevents.1565069922.dream-System │ │ │ │ │ ├── events.out.tfevents.1565069970.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070262.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070318.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070526.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070581.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070607.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070688.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070826.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070867.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070932.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070966.dream-System │ │ │ │ │ ├── events.out.tfevents.1565070986.dream-System │ │ │ │ │ ├── events.out.tfevents.1565071024.dream-System │ │ │ │ │ ├── events.out.tfevents.1565162850.dream-System │ │ │ │ │ ├── events.out.tfevents.1565165341.dream-System │ │ │ │ │ ├── events.out.tfevents.1565168457.dream-System │ │ │ │ │ ├── events.out.tfevents.1565170961.dream-System │ │ │ │ │ ├── events.out.tfevents.1565173560.dream-System │ │ │ │ │ ├── events.out.tfevents.1565173578.dream-System │ │ │ │ │ ├── events.out.tfevents.1565173609.dream-System │ │ │ │ │ ├── events.out.tfevents.1565173761.dream-System │ │ │ │ │ ├── events.out.tfevents.1565174061.dream-System │ │ │ │ │ ├── events.out.tfevents.1565174117.dream-System │ │ │ │ │ ├── events.out.tfevents.1565174191.dream-System │ │ │ │ │ ├── events.out.tfevents.1565174253.dream-System │ │ │ │ │ ├── events.out.tfevents.1565174276.dream-System │ │ │ │ │ ├── events.out.tfevents.1565174293.dream-System │ │ │ │ │ ├── events.out.tfevents.1565174349.dream-System │ │ │ │ │ ├── events.out.tfevents.1565174378.dream-System │ │ │ │ │ ├── events.out.tfevents.1565179687.dream-System │ │ │ │ │ ├── events.out.tfevents.1565182503.dream-System │ │ │ │ │ ├── events.out.tfevents.1565182554.dream-System │ │ │ │ │ └── train │ │ │ │ │ ├── events.out.tfevents.1577277074.hjq-Precision-T7610.19721.672.v2 │ │ │ │ │ ├── events.out.tfevents.1577277074.hjq-Precision-T7610.profile-empty │ │ │ │ │ ├── events.out.tfevents.1577277128.hjq-Precision-T7610.20083.106.v2 │ │ │ │ │ ├── events.out.tfevents.1577277719.hjq-Precision-T7610.20872.106.v2 │ │ │ │ │ ├── events.out.tfevents.1577277787.hjq-Precision-T7610.21065.672.v2 │ │ │ │ │ ├── events.out.tfevents.1577278233.hjq-Precision-T7610.21443.672.v2 │ │ │ │ │ ├── events.out.tfevents.1577278349.hjq-Precision-T7610.21613.672.v2 │ │ │ │ │ ├── events.out.tfevents.1577278745.hjq-Precision-T7610.22262.672.v2 │ │ │ │ │ ├── events.out.tfevents.1577279268.hjq-Precision-T7610.22939.672.v2 │ │ │ │ │ ├── events.out.tfevents.1577279585.hjq-Precision-T7610.2711.672.v2 │ │ │ │ │ ├── events.out.tfevents.1577280012.hjq-Precision-T7610.3191.672.v2 │ │ │ │ │ └── plugins │ │ │ │ │ └── profile │ │ │ │ │ ├── 2019-12-25_20-31-14 │ │ │ │ │ └── local.trace │ │ │ │ │ ├── 2019-12-25_20-32-08 │ │ │ │ │ └── local.trace │ │ │ │ │ ├── 2019-12-25_20-41-59 │ │ │ │ │ └── local.trace │ │ │ │ │ ├── 2019-12-25_20-43-07 │ │ │ │ │ └── local.trace │ │ │ │ │ ├── 2019-12-25_20-50-33 │ │ │ │ │ └── local.trace │ │ │ │ │ ├── 2019-12-25_20-52-29 │ │ │ │ │ └── local.trace │ │ │ │ │ ├── 2019-12-25_20-59-05 │ │ │ │ │ └── local.trace │ │ │ │ │ ├── 2019-12-25_21-07-48 │ │ │ │ │ └── local.trace │ │ │ │ │ ├── 2019-12-25_21-13-05 │ │ │ │ │ └── local.trace │ │ │ │ │ └── 2019-12-25_21-20-12 │ │ │ │ │ └── local.trace │ │ │ ├── node2vec.py │ │ │ ├── sdne.py │ │ │ └── walk_core_model.py │ │ ├── util │ │ │ ├── __init__.py │ │ │ ├── evaluate.py │ │ │ ├── test.txt │ │ │ └── util_tool.py │ │ └── wiki │ │ │ ├── Wiki_category.txt │ │ │ ├── Wiki_edgelist.txt │ │ │ └── Wiki_labels.txt │ └── feature_eng │ │ ├── __init__.py │ │ ├── base_model.py │ │ └── feature_transform.py ├── utils │ ├── __init__.py │ └── data_prepare.py └── wrapper │ ├── Feature_Columns.py │ └── __init__.py └── paper ├── Next Read ├── A Convolutional Click Prediction Model.pdf ├── [DSSM] Learning Deep Structured Semantic Models for Web Search using Clickthrough Data (UIUC 2013).pdf ├── [ESMM] Entire Space Multi-Task Model - An Effective Approach for Estimating Post-Click Conversion Rate (Alibaba 2018).pdf ├── [FAT-DeepFFM]Field Attentive Deep Field-aware Factorization Machine[2019].pdf ├── [FGCNN]Feature Generation by Convolutional Neural Network forClick-Through Rate Predicti[2019].pdf ├── [FLEN] Leveraging Field for Scalable CTR Predicti[2019].pdf ├── [FTRL] Ad Click Prediction a View from the Trenches (Google 2013).pdf ├── [Fi-GNN]Modeling Feature Interactions via Graph Neural Networks for CTR Prediction[2019].pdf ├── [FiBiNET]Combining Feature Importance and Bilinear featureInteraction for Click-Through Rate Predict[2019].pdf ├── [GBDT+LR] Practical Lessons from Predicting Clicks on Ads at Facebook (Facebook 2014).pdf ├── [Image CTR] Image Matters - Visually modeling user behaviors using Advanced Model Server (Alibaba 2018).pdf ├── [MINDN]Multi-Interest Network with Dynamic Routing for Recommendation at Tmall[2019].pdf ├── [OENN]Order-aware Embedding Neural Network for CTR Predicti[2019].pdf ├── [ONN]Operation-aware Neural Networks for User Response Prediction[2019].pdf ├── [PS-PLM] Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction (Alibaba 2017).pdf ├── [RLAC]Representation Learning-Assisted Click-Through Rate Prediction[2019].pdf └── [Warm Up Cold-start Advertisements]Improving CTR Predictions via Learning to Learn ID Embeddings[2019].pdf ├── README ├── behavior ├── [ALSH]Asymmetric LSH for Sublinear Time Maximum Inner Product Search[2014].pdf ├── [BST]Behavior Sequence Transformer for E-commerce Recommendation in Alibaba[2019].pdf ├── [DIEN] Deep Interest Evolution Network for Click-Through Rate Prediction (Alibaba 2019).pdf ├── [DIN] Deep Interest Network for Click-Through Rate Prediction (Alibaba 2018).pdf ├── [DSIN]Deep Session Interest Network for Click-Through Rate Predicti[2019].pdf ├── [DSTN]Deep Spatio-Temporal Neural Networks for Click-Through Rate Prediction[2019].pdf ├── [DTSF]Deep Time-Stream Framework for Click-Through Rate Prediction by Tracking Interest Evolution[2020].pdf ├── [LSM]Lifelong Sequential Modeling with Personalized Memorization for User Response Prediction[2019].pdf ├── [MIMN]Practice on Long Sequential User Behavior Modeling for Click-Through Rate Prediction[2019].pdf ├── [NTM]Neural Turing Machines[2014].pdf ├── [NTM]The_NTM_Introduction_And_Implementation[2017].pdf ├── [REFORMER] THE EFFICIENT TRANSFORMER[2020].pdf ├── [SIM]Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction[2020].pdf ├── [Self-Attention]Attention is all you need(Google 2017).pdf └── [SeqFM]Sequence-Aware Factorization Machines(2019).pdf └── interactive ├── [AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf ├── [AutoInt] AutoInt Automatic Feature Interaction Learning via Self-Attentive Neural Networks(CIKM 2019).pdf ├── [DCN] Deep & Cross Network for Ad Click Predictions (Stanford 2017).pdf ├── [Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf ├── [DeepFM] A Factorization-Machine based Neural Network for CTR Prediction (HIT-Huawei 2017).pdf ├── [FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf ├── [FM] Fast Context-aware Recommendations with Factorization Machines (UKON 2011).pdf ├── [FNN] Deep Learning over Multi-field Categorical Data (UCL 2016).pdf ├── [LR] Predicting Clicks - Estimating the Click-Through Rate for New Ads (Microsoft 2007).pdf ├── [NFM] Neural Factorization Machines for Sparse Predictive Analytics (NUS 2017).pdf ├── [PNN] Product-based Neural Networks for User Response Prediction (SJTU 2016).pdf ├── [Wide & Deep] Wide & Deep Learning for Recommender Systems (Google 2016).pdf └── [xDeepFM] xDeepFM - Combining Explicit and Implicit Feature Interactions for Recommender Systems (USTC 2018).pdf /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /.idea/CTR_Function.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CTR_Function 2 | 3 | ![image](https://img.shields.io/badge/author-TIXhjq-orange) 4 | ![image](https://img.shields.io/badge/tensorflow-v2.1-darkgreen) 5 | ![image](https://img.shields.io/badge/pandas-v0.25.0-darkgreen) 6 | ![image](https://img.shields.io/badge/pypi_package-v0.0.5-lightgrey) 7 | ![image](https://img.shields.io/badge/License-Apache_2.0-blue) 8 | Email:hjq1922451756@gmail.com 9 | 10 | 食用方式: 11 | pip install Data-Function 12 | from kon. ... 13 | 14 | >1.[code folder](https://github.com/TIXhjq/CTR_Function/tree/master/code)=[nn,feature_eng]) 15 | > 16 | >>1)新鲜,可食用 17 | >>>[[building...] CTR](https://github.com/TIXhjq/CTR_Function/tree/master/kon/model/ctr_model) 18 | > 19 | >>2)需要重构部分(历史代码) 20 | >>>[Graph(重构)](https://github.com/TIXhjq/CTR_Function/tree/master/kon/model/embedding) 21 | >>>[[building...] Feature Enginner(在scala上进行重构)](https://github.com/TIXhjq/CTR_Function/tree/master/kon/model/feature_eng) 22 | > 23 | >2.[paper](https://github.com/TIXhjq/CTR_Function/tree/master/paper) [nn(目前只有CTR部分,剩下的下次一定^_^) 24 | >3.[use example](https://github.com/TIXhjq/CTR_Function/tree/master/example) [ctr_example(充足),else(maybe...) 25 | >4.[data](https://github.com/TIXhjq/CTR_Function/tree/master/data) 26 | 27 | p.s 每层目录有对应的详细介绍... 28 | 29 | [部分nn模型小结,不过比较懒,就几个就是了](https://zhuanlan.zhihu.com/c_1145034612807028736) 30 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | data url: 2 | [DataSet](https://www.dropbox.com/s/jjyygph9wm36fmr/dataset.tar.gz?dl=0) 3 | download to ./ -------------------------------------------------------------------------------- /example/ctr_example/dmin_seq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # _*_ coding:utf-8 _*_ 4 | '''================================= 5 | @Author :tix_hjq 6 | @Date :2020/11/23 下午7:15 7 | @File :dmin_seq.py 8 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 9 | =================================''' 10 | from kon.model.ctr_model.model.models import * 11 | 12 | warnings.filterwarnings("ignore") 13 | pd.set_option('display.max_columns', None) 14 | pd.set_option('display.max_rows', None) 15 | pd.set_option('max_colwidth', 100) 16 | 17 | print(os.getcwd()) 18 | # ---------------------------------------------------- 19 | data_folder = '../../data/' 20 | origin_data_folder = data_folder + 'origin_data/' 21 | submit_data_folder = data_folder + 'submit_data/' 22 | eda_data_folder = data_folder + 'eda_data/' 23 | fea_data_folder = data_folder + 'fea_data/' 24 | # ----------------------------------------------------------------- 25 | model_tool = base_model(submit_data_folder) 26 | fea_tool = feature_tool(fea_data_folder) 27 | data_pre = data_prepare(batch_size=32) 28 | # ----------------------------------------------------------------- 29 | columns = ["date", "user_id", "price", "ad_id", "cate_id", "target", "day"] 30 | 31 | trainDf = pd.read_csv(origin_data_folder + 'ali_data/train.csv', usecols=columns, nrows=100) 32 | testDf = pd.read_csv(origin_data_folder + 'ali_data/test.csv', usecols=columns, nrows=100) 33 | 34 | df = pd.concat([trainDf, testDf], axis=0) 35 | df["date"] = pd.to_datetime(df.date) 36 | df.sort_values(["date"], inplace=True) 37 | print(df.head()) 38 | -------------------------------------------------------------------------------- /example/ctr_example/gsp_seq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/6/27 下午3:29 6 | @File :gsp_seq.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' 9 | from kon.model.ctr_model.model.models import * 10 | warnings.filterwarnings("ignore") 11 | pd.set_option('display.max_columns', None) 12 | pd.set_option('display.max_rows', None) 13 | pd.set_option('max_colwidth', 100) 14 | 15 | print(os.getcwd()) 16 | #---------------------------------------------------- 17 | data_folder = '../../data/' 18 | origin_data_folder = data_folder + 'origin_data/' 19 | submit_data_folder = data_folder + 'submit_data/' 20 | eda_data_folder = data_folder + 'eda_data/' 21 | fea_data_folder = data_folder + 'fea_data/' 22 | #----------------------------------------------------------------- 23 | model_tool = base_model(submit_data_folder) 24 | fea_tool = feature_tool(fea_data_folder) 25 | data_pre=data_prepare() 26 | #----------------------------------------------------------------- 27 | trainDf=pd.read_csv(origin_data_folder+'gsp_train.csv') 28 | testDf=pd.read_csv(origin_data_folder+'gsp_test.csv') 29 | 30 | sparse_fea=['userid','target_item','pos_ts'] 31 | seq_fea=['item_seq','gsp_seq'] 32 | target_fea=['target'] 33 | 34 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf) 35 | seqDf=df[seq_fea] 36 | sparseDf=df[sparse_fea] 37 | targetDf=df[target_fea] 38 | 39 | print(targetDf['target'].value_counts()) 40 | 41 | seqDf,seqIdx,seqInfo=data_pre.seq_deal( 42 | seqDf,max_len=[90]*2,embedding_dim=[8]*2,mask_zero=True,is_trainable=True,is_str_list=False,is_str=True, 43 | pre_weight=None,sample_num=5) 44 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf) 45 | 46 | train,val=data_pre.extract_train_test( 47 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf) 48 | 49 | behaviorFea=['item_seq','gsp_seq'] 50 | 51 | model=BST(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),behaviorFea=behaviorFea) 52 | print(model.summary()) 53 | 54 | model.compile(loss="mean_squared_error",optimizer='adam',metrics=['accuracy']) 55 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)]) -------------------------------------------------------------------------------- /example/ctr_example/seq.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | '''================================= 3 | @Author :tix_hjq 4 | @Date :2020/5/22 下午4:40 5 | @File :seq.py 6 | =================================''' 7 | from kon.model.ctr_model.model.models import * 8 | 9 | warnings.filterwarnings("ignore") 10 | pd.set_option('display.max_columns', None) 11 | pd.set_option('display.max_rows', None) 12 | pd.set_option('max_colwidth', 100) 13 | 14 | print(os.getcwd()) 15 | #---------------------------------------------------- 16 | data_folder = '../../data/' 17 | origin_data_folder = data_folder + 'origin_data/' 18 | submit_data_folder = data_folder + 'submit_data/' 19 | eda_data_folder = data_folder + 'eda_data/' 20 | fea_data_folder = data_folder + 'fea_data/' 21 | #----------------------------------------------------------------- 22 | model_tool = base_model(submit_data_folder) 23 | fea_tool = feature_tool(fea_data_folder) 24 | data_pre=data_prepare(batch_size=32) 25 | #----------------------------------------------------------------- 26 | trainDf=pd.read_csv(origin_data_folder+'seq_train.csv') 27 | testDf=pd.read_csv(origin_data_folder+'seq_test.csv') 28 | 29 | sparse_fea=['user_id','item_id','item_cate'] 30 | seq_fea=['buy_list','cate_list'] 31 | # seq_fea=['buy_list'] 32 | target_fea=['target'] 33 | 34 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf) 35 | seqDf=df[seq_fea] 36 | sparseDf=df[sparse_fea] 37 | targetDf=df[target_fea] 38 | 39 | seqDf,seqIdx,seqInfo=data_pre.seq_deal( 40 | seqDf,max_len=[90]*2,embedding_dim=[8]*2,mask_zero=True,is_trainable=True, 41 | pre_weight=None,sample_num=5) 42 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf) 43 | 44 | train,val=data_pre.extract_train_test( 45 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf) 46 | 47 | candidateFea=['item_id','item_cate'] 48 | behaviorFea=['buy_list','cate_list'] 49 | 50 | model=DIEN(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),candidateFea=candidateFea,behaviorFea=behaviorFea) 51 | print(model.summary()) 52 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()]) 53 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)]) -------------------------------------------------------------------------------- /example/ctr_example/session.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/6/9 上午11:06 6 | @File :session.py 7 | =================================''' 8 | from numpy.random import random 9 | from kon.model.ctr_model.model.models import * 10 | from kon.utils.data_prepare import data_prepare 11 | 12 | warnings.filterwarnings("ignore") 13 | pd.set_option('display.max_columns', None) 14 | pd.set_option('display.max_rows', None) 15 | pd.set_option('max_colwidth', 100) 16 | 17 | print(os.getcwd()) 18 | #---------------------------------------------------- 19 | data_folder = '../../data/' 20 | origin_data_folder = data_folder + 'origin_data/' 21 | submit_data_folder = data_folder + 'submit_data/' 22 | eda_data_folder = data_folder + 'eda_data/' 23 | fea_data_folder = data_folder + 'fea_data/' 24 | #----------------------------------------------------------------- 25 | model_tool = base_model(submit_data_folder) 26 | fea_tool = feature_tool(fea_data_folder) 27 | data_pre=data_prepare() 28 | #----------------------------------------------------------------- 29 | np.random.seed(2020) 30 | tf.random.set_seed(2020) 31 | 32 | trainDf=pd.read_csv(origin_data_folder+'session_train.csv') 33 | testDf=pd.read_csv(origin_data_folder+'session_test.csv') 34 | 35 | session_maxLen=10 36 | session_maxNum=20 37 | sparse_fea=['region','prev','vid','cid','class_id'] 38 | dense_fea=['title_length'] 39 | seq_fea=['click_item_session'] 40 | target_fea=['label'] 41 | 42 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf) 43 | seqDf=df[seq_fea] 44 | sparseDf=df[sparse_fea] 45 | denseDf=df[dense_fea] 46 | targetDf=df[target_fea] 47 | 48 | seqDf,seqInfo=data_pre.sparse_wrap(seqDf,seqIdx_path=origin_data_folder+'session_seq_idx.pkl',max_len=[session_maxLen*session_maxNum]*1,embedding_dim=[8]*1) 49 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf) 50 | denseDf,denseInfo=data_pre.dense_fea_deal(denseDf) 51 | 52 | train,val=data_pre.extract_train_test( 53 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf,denseDf=denseDf) 54 | 55 | candidateFea=['vid'] 56 | behaviorFea=['click_item_session'] 57 | 58 | model=DSIN(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),candidateFea=candidateFea,behaviorFea=behaviorFea) 59 | print(model.summary()) 60 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()]) 61 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)]) -------------------------------------------------------------------------------- /example/ctr_example/session_prepare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/17 下午4:27 6 | @File :session_prepare.py 7 | =================================''' 8 | from pandas import DataFrame 9 | import gc 10 | from scipy import stats 11 | from kon.model.ctr_model.model.models import * 12 | from kon.utils.data_prepare import data_prepare 13 | 14 | warnings.filterwarnings("ignore") 15 | pd.set_option('display.max_columns', None) 16 | pd.set_option('display.max_rows', None) 17 | pd.set_option('max_colwidth', 100) 18 | 19 | print(os.getcwd()) 20 | #---------------------------------------------------- 21 | data_folder = '../../data/' 22 | origin_data_folder = data_folder + 'origin_data/mgtv_data/' 23 | submit_data_folder = data_folder + 'submit_data/' 24 | eda_data_folder = data_folder + 'eda_data/' 25 | fea_data_folder = data_folder + 'fea_data/' 26 | #----------------------------------------------------------------- 27 | model_tool = base_model(submit_data_folder) 28 | fea_tool = feature_tool(fea_data_folder) 29 | data_format=data_prepare() 30 | #----------------------------------------------------------------- 31 | def pareper(): 32 | context=pd.read_parquet(origin_data_folder+'context1.parquet') 33 | item=pd.read_parquet(origin_data_folder+'item.parquet') 34 | user=pd.read_csv(origin_data_folder+'user.parquet') 35 | 36 | user=user.merge(context,how='left',on=['did']) 37 | user=user.merge(item,how='left',on=['vid']) 38 | 39 | logs_fea=['click_item','click_time'] 40 | user_fea=['did','region','prev'] 41 | ad_fea=['vid','cid','class_id','title_length'] 42 | target_fea=['label'] 43 | 44 | use_fea=logs_fea+user_fea+ad_fea+target_fea 45 | 46 | user=user[use_fea] 47 | user.drop_duplicates(['did'],inplace=True) 48 | user.to_csv(origin_data_folder+'data.csv',index=None) 49 | 50 | df=pd.read_csv(origin_data_folder+'part_29/data.csv') 51 | df=pd.concat([df,pd.read_csv(origin_data_folder+'part_30/data.csv')],axis=0) 52 | df.to_csv(origin_data_folder+'data.csv',index=None) 53 | 54 | 55 | def generator_session_idx(df, group_cols: list = ['did', 'click_time'], item_cols: str = 'click_item'): 56 | ''' 57 | :param df: 58 | format: 59 | user_id time item 60 | 1 1 1 61 | :param group_cols: 62 | format: list ==> [user,time] 63 | [groupby sign index:user_id,groupby time index:session split time] 64 | :param item_cols: 65 | item cols 66 | :return: 67 | ''' 68 | 69 | def session_list(x): 70 | return len(x.tolist()) 71 | 72 | df = df.groupby(group_cols)[item_cols].agg(session_list).reset_index().rename( 73 | columns={item_cols: '{}_session_idx'.format(item_cols)}) 74 | 75 | def seq_idx(x): 76 | s_ = 0 77 | need_list = ['0'] 78 | for i in x.tolist(): 79 | s_ += i 80 | need_list.append(str(s_)) 81 | return ','.join(need_list) 82 | 83 | df = df.groupby([group_cols[0]])['{}_session_idx'.format(item_cols)].agg(seq_idx).reset_index() 84 | 85 | return df 86 | 87 | save_folder=data_folder + 'origin_data/' 88 | def perpare(): 89 | ori_df=pd.read_csv(origin_data_folder+'data.csv') 90 | ori_df['seq_len']=[len(str(i).split(',')) for i in ori_df['click_item'].tolist()] 91 | seqDf,seq_idx,seqInfo=data_format.seq_deal(seqDf=ori_df[['click_item']],embedding_dim=[8],is_str=True,is_str_list=False,use_wrap=False) 92 | ori_df['click_item']=[','.join([str(j) for j in i]) for i in seqDf['click_item']] 93 | fea_tool.pickle_op(path=save_folder+'session_seq_idx.pkl',is_save=True,file=seq_idx) 94 | 95 | return ori_df 96 | 97 | import time 98 | def get_time(timeStamp): 99 | timeArray = time.localtime(int(timeStamp)) 100 | return time.strftime("%Y-%m-%d %H:%M:%S", timeArray) 101 | 102 | def gen_session_seq(session_maxLen,session_maxNum): 103 | ori_df=perpare() 104 | df=ori_df 105 | df.dropna(inplace=True) 106 | df['click_time']=[','.join([get_time(j) for j in i.split(',')]) for i in df['click_time'].tolist()] 107 | # 1h as split session 108 | time_list=[i.split(',')for i in df['click_time'].tolist()] 109 | item_list=[i.split(',')for i in df['click_item'].tolist()] 110 | did_list=[[i]*len(l) for i,l in zip(df['did'].tolist(),item_list)] 111 | 112 | df=DataFrame() 113 | t_list = [] 114 | i_list = [] 115 | d_list = [] 116 | for t_,i_,d_ in zip(time_list,item_list,did_list): 117 | t_list+=t_ 118 | i_list+=i_ 119 | d_list+=d_ 120 | df['click_time']=t_list 121 | df['click_item']=i_list 122 | df['did']=d_list 123 | 124 | df['click_time']=pd.to_datetime(df['click_time']) 125 | df['click_time']=df['click_time'].dt.day*100+df['click_time'].dt.hour 126 | df['click_item']=df['click_item'].astype('str') 127 | 128 | df=data_format.generator_session(df,group_cols=['did','click_time'],item_cols='click_item',session_maxLen=session_maxLen) 129 | df=data_format.generator_seq(df,group_cols=['did','click_time'],item_cols='click_item',session_maxNum=session_maxNum,session_maxLen=session_maxLen) 130 | 131 | del ori_df['click_time'] 132 | ori_df=ori_df.merge(df,how='left',on=['did']) 133 | ori_df.to_csv('../../data/origin_data/data.csv',index=None) 134 | 135 | def get_session_seq(df,item_col,max_session_length=10): 136 | session_seq=[ 137 | [item_.split(',')[int(s_):int(e_)] 138 | for s_,e_ in zip(idx_.split(',')[:-1],idx_.split(',')[1:])] 139 | for item_,idx_ in zip(df[item_col].tolist(),df['{}_session_idx'.format(item_col)].tolist())] 140 | return [[tf.keras.preprocessing.sequence.pad_sequences(seq,maxlen=max_session_length) for seq in i]for i in session_seq] 141 | 142 | 143 | def check_length(): 144 | df=pd.read_csv('../../data/origin_data/data.csv') 145 | df['seq_len']=[len(i.split(' ')) for i in df['click_item'].tolist()] 146 | df['session_len_mod']=[stats.mode([len(j.split(',')) for j in i.split(' ')]) for i in df['click_item'].tolist()] 147 | df['session_len_mean']=[np.mean([len(j.split(',')) for j in i.split(' ')]) for i in df['click_item'].tolist()] 148 | df['session_len_mediandf ']=[np.median([len(j.split(',')) for j in i.split(' ')]) for i in df['click_item'].tolist()] 149 | 150 | print(df.session_len_mod.value_counts()) 151 | print(df.session_len_mean.value_counts()) 152 | print(df.session_len_median.value_counts()) 153 | 154 | session_maxLen=10 155 | session_maxNum=20 156 | gen_session_seq(session_maxLen,session_maxNum) 157 | 158 | df=pd.read_csv('../../data/origin_data/data.csv') 159 | del df['seq_len'],df['did'],df['click_item'] 160 | gc.collect() 161 | 162 | train_df=df.loc[:df.shape[0]*0.8] 163 | test_df=df.loc[df.shape[0]*0.8:] 164 | 165 | train_df.to_csv(save_folder+'session_train.csv',index=None) 166 | test_df.to_csv(save_folder+'session_test.csv',index=None) 167 | 168 | -------------------------------------------------------------------------------- /example/ctr_example/sim_seq.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | '''================================= 3 | @Author :tix_hjq 4 | @Date :2020/11/22 上午10:18 5 | @File :sim_seq.py 6 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 7 | =================================''' 8 | from kon.model.ctr_model.model.models import * 9 | 10 | warnings.filterwarnings("ignore") 11 | pd.set_option('display.max_columns', None) 12 | pd.set_option('display.max_rows', None) 13 | pd.set_option('max_colwidth', 100) 14 | 15 | print(os.getcwd()) 16 | #---------------------------------------------------- 17 | data_folder = '../../data/' 18 | origin_data_folder = data_folder + 'origin_data/' 19 | submit_data_folder = data_folder + 'submit_data/' 20 | eda_data_folder = data_folder + 'eda_data/' 21 | fea_data_folder = data_folder + 'fea_data/' 22 | #----------------------------------------------------------------- 23 | model_tool = base_model(submit_data_folder) 24 | fea_tool = feature_tool(fea_data_folder) 25 | data_pre=data_prepare(batch_size=32) 26 | #----------------------------------------------------------------- 27 | 28 | trainDf=pd.read_csv(origin_data_folder+'seq_train.csv') 29 | testDf=pd.read_csv(origin_data_folder+'seq_test.csv') 30 | 31 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf) 32 | 33 | reduceSeq,reduceCate=data_pre.hard_search(seqData=fea_tool.batch_convert_list(df["cate_list"]), 34 | seqCate=fea_tool.batch_convert_list(df["cate_list"]), 35 | targetCate=df["item_cate"].tolist()) 36 | df["reduce_seq"]=reduceSeq 37 | df["reduce_seq"]=df["reduce_seq"].astype("str") 38 | df["reduce_cate"]=reduceCate 39 | df["reduce_cate"]=df["reduce_cate"].astype("str") 40 | 41 | sparse_fea=['user_id','item_id','item_cate'] 42 | reduce_fea=['reduce_seq','reduce_cate'] 43 | seq_fea=["buy_list","cate_list"]+reduce_fea 44 | target_fea=['target'] 45 | 46 | seqDf=df[seq_fea] 47 | sparseDf=df[sparse_fea] 48 | targetDf=df[target_fea] 49 | 50 | seqDf,seqIdx,seqInfo=data_pre.seq_deal( 51 | seqDf,max_len=[90]*4,embedding_dim=[8]*4,mask_zero=True,is_trainable=True, 52 | pre_weight=None,sample_num=5,use_wrap=True) 53 | 54 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf) 55 | 56 | train,val=data_pre.extract_train_test( 57 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf) 58 | 59 | model=SIM(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),reduceFea=reduce_fea,candidateFea=["item_id","item_cate"],behaviorFea=seq_fea) 60 | print(model.summary()) 61 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()]) 62 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)]) -------------------------------------------------------------------------------- /example/ctr_example/timeInterval.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | '''================================= 3 | @Author :tix_hjq 4 | @Date :2020/6/24 下午1:13 5 | @File :timeInterval.py 6 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 7 | =================================''' 8 | from kon.model.ctr_model.model.models import * 9 | 10 | warnings.filterwarnings("ignore") 11 | pd.set_option('display.max_columns', None) 12 | pd.set_option('display.max_rows', None) 13 | pd.set_option('max_colwidth', 100) 14 | 15 | print(os.getcwd()) 16 | #---------------------------------------------------- 17 | data_folder = '../../data/' 18 | origin_data_folder = data_folder + 'origin_data/' 19 | submit_data_folder = data_folder + 'submit_data/' 20 | eda_data_folder = data_folder + 'eda_data/' 21 | fea_data_folder = data_folder + 'fea_data/' 22 | #----------------------------------------------------------------- 23 | model_tool = base_model(submit_data_folder) 24 | fea_tool = feature_tool(fea_data_folder) 25 | data_pre=data_prepare() 26 | #----------------------------------------------------------------- 27 | trainDf=pd.read_csv(origin_data_folder+'time_inter_train.csv') 28 | testDf=pd.read_csv(origin_data_folder+'time_inter_test.csv') 29 | 30 | sparse_fea=['did','region','vid','cid'] 31 | seq_fea=['click_item','click_interval'] 32 | target_fea=['label'] 33 | 34 | df,(train_idx,test_idx)=data_pre.concat_test_train(trainDf,testDf) 35 | seqDf=df[seq_fea] 36 | sparseDf=df[sparse_fea] 37 | targetDf=df[target_fea] 38 | 39 | seqDf,seqIdx,seqInfo=data_pre.seq_deal( 40 | seqDf=seqDf,embedding_dim=[8,0],max_len=[90]*2,is_str_list=False, 41 | is_str=True,sample_num=5) 42 | sparseDf,sparseInfo=data_pre.sparse_fea_deal(sparseDf) 43 | 44 | train,val=data_pre.extract_train_test( 45 | targetDf=targetDf,test_idx=test_idx,train_idx=train_idx,sparseDf=sparseDf,seqDf=seqDf) 46 | 47 | 48 | userFea=['region'] 49 | timestampFea=['click_interval'] 50 | behaviorFea=['click_item'] 51 | targetFea=['vid'] 52 | 53 | model=DTS(data_pre.FeatureInput(sparseInfo=sparseInfo,seqInfo=seqInfo),userFea=userFea, 54 | timestampFea=timestampFea,behaviorFea=behaviorFea,targetFea=targetFea) 55 | print(model.summary()) 56 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()]) 57 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)]) -------------------------------------------------------------------------------- /example/ctr_example/un_seq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/3 下午4:59 6 | @File :un_seq.py 7 | =================================''' 8 | from numpy.random import random 9 | import tensorflow as tf 10 | import pandas as pd 11 | import numpy as np 12 | import warnings 13 | import os 14 | from kon.model.ctr_model.model.models import * 15 | 16 | warnings.filterwarnings("ignore") 17 | pd.set_option('display.max_columns', None) 18 | pd.set_option('display.max_rows', None) 19 | pd.set_option('max_colwidth', 100) 20 | 21 | print(os.getcwd()) 22 | #---------------------------------------------------- 23 | data_folder = '../../data/' 24 | origin_data_folder = data_folder + 'origin_data/' 25 | submit_data_folder = data_folder + 'submit_data/' 26 | eda_data_folder = data_folder + 'eda_data/' 27 | fea_data_folder = data_folder + 'fea_data/' 28 | #----------------------------------------------------------------- 29 | model_tool = base_model(submit_data_folder) 30 | fea_tool = feature_tool(fea_data_folder) 31 | prepare_tool=data_prepare() 32 | #----------------------------------------------------------------- 33 | np.random.seed(2020) 34 | tf.random.set_seed(2020) 35 | 36 | train_df=pd.read_csv(origin_data_folder+'unseq_train.csv',nrows=100).rename(columns={'target':'label'}) 37 | test_df=pd.read_csv(origin_data_folder+'unseq_test.csv',nrows=100).rename(columns={'target':'label'}) 38 | 39 | sparse_fea=[str(i) for i in range(14,40)] 40 | dense_fea=[str(i) for i in range(1,14)] 41 | target_fea=['label'] 42 | 43 | val_index=np.random.choice(train_df.index.tolist(),size=int(train_df.shape[0]*0.3)) 44 | train_index=[i for i in train_df.index.tolist()if i not in val_index] 45 | 46 | df,(train_idx,test_idx)=prepare_tool.concat_test_train(train_df,test_df) 47 | sparseDf=df[sparse_fea] 48 | denseDf=df[dense_fea] 49 | targetDf=df[target_fea] 50 | 51 | sparseDf,sparseInfo=prepare_tool.sparse_fea_deal(sparseDf) 52 | denseDf,denseInfo=prepare_tool.dense_fea_deal(denseDf) 53 | 54 | train_df,test_df,y_train,y_test=prepare_tool.extract_train_test(train_idx=train_idx,test_idx=test_idx,sparseDf=sparseDf,denseDf=denseDf,targetDf=targetDf,use_softmax=True) 55 | # train_df,test_df,y_train,y_test=prepare_tool.extract_train_test(train_idx=train_idx,test_idx=test_idx,sparseDf=sparseDf,targetDf=targetDf) 56 | train,val=prepare_tool.split_val_set(train_df,y_train,train_index,val_index) 57 | #----------------------------train model-------------------------------------- 58 | 59 | model=FM(prepare_tool.FeatureInput(sparseInfo=sparseInfo,denseInfo=denseInfo,useAddLinear=False,useLinear=True,useFlattenLinear=False)) 60 | print(model.summary()) 61 | model.compile(loss=tf.losses.binary_crossentropy,optimizer='adam',metrics=[tf.keras.metrics.AUC()]) 62 | model.fit(train,validation_data=val,epochs=100,callbacks=[tf.keras.callbacks.EarlyStopping(patience=10,verbose=5)]) -------------------------------------------------------------------------------- /kon/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/7/21 上午9:00 6 | @File :__init__.py.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' -------------------------------------------------------------------------------- /kon/model/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/29 下午4:01 6 | @File :__init__.py.py 7 | =================================''' 8 | -------------------------------------------------------------------------------- /kon/model/ctr_model/README.md: -------------------------------------------------------------------------------- 1 | ![image](https://img.shields.io/badge/achieve_build-17-orange) 2 | 3 | 4 | CTR MODEL ACHIEVE: 5 | >[1]Interactive Model 6 | >>1.[[FM] Fast Context-aware Recommendations with Factorization Machines (UKON 2011).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BFM%5D%20Fast%20Context-aware%20Recommendations%20with%20Factorization%20Machines%20(UKON%202011).pdf) 7 | >>2.[[PNN] Product-based Neural Networks for User Response Prediction (SJTU 2016).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BPNN%5D%20Product-based%20Neural%20Networks%20for%20User%20Response%20Prediction%20(SJTU%202016).pdf) 8 | >>3.[[Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BDeep%20Crossing%5D%20Deep%20Crossing%20-%20Web-Scale%20Modeling%20without%20Manually%20Crafted%20Combinatorial%20Features%20(Microsoft%202016).pdf) 9 | >>4.[[Wide & Deep] Wide & Deep Learning for Recommender Systems (Google 2016).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BWide%20%26%20Deep%5D%20Wide%20%26%20Deep%20Learning%20for%20Recommender%20Systems%20(Google%202016).pdf) 10 | >>5.[[DeepFM] A Factorization-Machine based Neural Network for CTR Prediction (HIT-Huawei 2017).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BDeepFM%5D%20A%20Factorization-Machine%20based%20Neural%20Network%20for%20CTR%20Prediction%20(HIT-Huawei%202017).pdf) 11 | >>6.[[DCN] Deep & Cross Network for Ad Click Predictions (Stanford 2017).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BDCN%5D%20Deep%20%26%20Cross%20Network%20for%20Ad%20Click%20Predictions%20(Stanford%202017).pdf) 12 | >>7.[[NFM] Neural Factorization Machines for Sparse Predictive Analytics (NUS 2017).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BNFM%5D%20Neural%20Factorization%20Machines%20for%20Sparse%20Predictive%20Analytics%20(NUS%202017).pdf) 13 | >>8.[[xDeepFM] xDeepFM - Combining Explicit and Implicit Feature Interactions for Recommender Systems (USTC 2018).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BxDeepFM%5D%20xDeepFM%20-%20Combining%20Explicit%20and%20Implicit%20Feature%20Interactions%20for%20Recommender%20Systems%20(USTC%202018).pdf) 14 | >>9.[[AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BAFM%5D%20Attentional%20Factorization%20Machines%20-%20Learning%20the%20Weight%20of%20Feature%20Interactions%20via%20Attention%20Networks%20(ZJU%202017).pdf) 15 | >>10.[[AutoInt] AutoInt Automatic Feature Interaction Learning via Self-Attentive Neural Networks(CIKM 2019).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/interactive/%5BAutoInt%5D%20AutoInt%20Automatic%20Feature%20Interaction%20Learning%20via%20Self-Attentive%20Neural%20Networks(CIKM%202019).pdf) 16 | >>...Later Building... 17 | 18 | >[2]Behavior Model 19 | >>1.[[DIN] Deep Interest Network for Click-Through Rate Prediction (Alibaba 2018).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDIN%5D%20Deep%20Interest%20Network%20for%20Click-Through%20Rate%20Prediction%20(Alibaba%202018).pdf) 20 | >>2.[[DIEN] Deep Interest Evolution Network for Click-Through Rate Prediction (Alibaba 2019).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDIEN%5D%20Deep%20Interest%20Evolution%20Network%20for%20Click-Through%20Rate%20Prediction%20(Alibaba%202019).pdf) 21 | >>3.[[DSIN]Deep Session Interest Network for Click-Through Rate Predicti(Alibaba 2019).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDSIN%5DDeep%20Session%20Interest%20Network%20for%20Click-Through%20Rate%20Predicti%5B2019%5D.pdf) 22 | >>4.[[SeqFM]Sequence-Aware Factorization Machines(2019).pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BSeqFM%5DSequence-Aware%20Factorization%20Machines(2019).pdf) 23 | >>5.[[DTS]Deep Time-Stream Framework for Click-Through Rate Prediction by Tracking Interest Evolution[2020].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDTSF%5DDeep%20Time-Stream%20Framework%20for%20Click-Through%20Rate%20Prediction%20by%20Tracking%20Interest%20Evolution%5B2020%5D.pdf) 24 | >>6.[[BST]Behavior Sequence Transformer for E-commerce Recommendation in Alibaba[2019].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BBST%5DBehavior%20Sequence%20Transformer%20for%20E-commerce%20Recommendation%20in%20Alibaba%5B2019%5D.pdf) 25 | >>7.[[MIMN]Practice on Long Sequential User Behavior Modeling for Click-Through Rate Prediction[2019].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BMIMN%5DPractice%20on%20Long%20Sequential%20User%20Behavior%20Modeling%20for%20Click-Through%20Rate%20Prediction%5B2019%5D.pdf) 26 | >>...Later Building... 27 | 28 | >[3]Next Building 29 | >>1.[reading][[LSM]Lifelong Sequential Modeling with Personalized Memorization for User Response Prediction[2019].pdf]() 30 | >>2.[building][[DSTN]Deep Spatio-Temporal Neural Networks for Click-Through Rate Prediction[2019].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BDSTN%5DDeep%20Spatio-Temporal%20Neural%20Networks%20for%20Click-Through%20Rate%20Prediction%5B2019%5D.pdf) 31 | >>3.[reading][[FiBiNET]Combining Feature Importance and Bilinear featureInteraction for Click-Through Rate Predict[2019].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/Next%20Read/%5BFiBiNET%5DCombining%20Feature%20Importance%20and%20Bilinear%20featureInteraction%20for%20Click-Through%20Rate%20Predict%5B2019%5D.pdf) 32 | >>4.[building][[SIM]Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction[2020].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BSIM%5DSearch-based%20User%20Interest%20Modeling%20with%20Lifelong%20Sequential%20Behavior%20Data%20for%20Click-Through%20Rate%20Prediction%5B2020%5D.pdf) 33 | >>...... 34 | 35 | p.s 36 | 1.DIEN,paper中的控制更新门并没有实际实现,实际上因为keras里面的 37 | 我只弄了standardLstm,但CudnnLstm改动起来有点麻烦,实际上这里是是直接使 38 | 用weight*hidden_state 39 | 2.[building]以后默认采用更快速的mult-attention[1],会在涉及product部分提供hash选项 40 | 41 | p.s并不是复现,现在在家没机器,逻辑上应该问题不大,用的部分采样数据,测试模型连通,有问题的话欢迎交流. 42 | 43 | 44 | [[1][REFORMER] THE EFFICIENT TRANSFORMER[2020].pdf](https://github.com/TIXhjq/CTR_Function/blob/master/paper/behavior/%5BREFORMER%5D%20THE%20EFFICIENT%20TRANSFORMER%5B2020%5D.pdf) -------------------------------------------------------------------------------- /kon/model/ctr_model/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/29 下午4:01 6 | @File :__init__.py.py 7 | =================================''' 8 | import pandas as pd 9 | import warnings 10 | import os 11 | from kon.model.feature_eng.feature_transform import feature_tool 12 | from kon.model.feature_eng.base_model import base_model 13 | 14 | warnings.filterwarnings("ignore") 15 | pd.set_option('display.max_columns', None) 16 | pd.set_option('display.max_rows', None) 17 | pd.set_option('max_colwidth', 100) 18 | 19 | print(os.getcwd()) 20 | #---------------------------------------------------- 21 | data_folder = '../../data/' 22 | origin_data_folder = data_folder + 'origin_data/' 23 | submit_data_folder = data_folder + 'submit_data/' 24 | eda_data_folder = data_folder + 'eda_data/' 25 | fea_data_folder = data_folder + 'fea_data/' 26 | #----------------------------------------------------------------- 27 | model_tool = base_model(submit_data_folder) 28 | fea_tool = feature_tool(fea_data_folder) 29 | #----------------------------------------------------------------- -------------------------------------------------------------------------------- /kon/model/ctr_model/layer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/29 下午4:01 6 | @File :__init__.py.py 7 | =================================''' -------------------------------------------------------------------------------- /kon/model/ctr_model/layer/behavior_layer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/6/15 上午11:38 6 | @File :__init__.py.py 7 | =================================''' 8 | import pandas as pd 9 | import warnings 10 | import os 11 | from kon.model.feature_eng.feature_transform import feature_tool 12 | from kon.model.feature_eng.base_model import base_model 13 | 14 | warnings.filterwarnings("ignore") 15 | pd.set_option('display.max_columns', None) 16 | pd.set_option('display.max_rows', None) 17 | pd.set_option('max_colwidth', 100) 18 | 19 | print(os.getcwd()) 20 | #---------------------------------------------------- 21 | data_folder = '../../data/' 22 | origin_data_folder = data_folder + 'origin_data/' 23 | submit_data_folder = data_folder + 'submit_data/' 24 | eda_data_folder = data_folder + 'eda_data/' 25 | fea_data_folder = data_folder + 'fea_data/' 26 | #----------------------------------------------------------------- 27 | model_tool = base_model(submit_data_folder) 28 | fea_tool = feature_tool(fea_data_folder) 29 | #----------------------------------------------------------------- -------------------------------------------------------------------------------- /kon/model/ctr_model/layer/core_layer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/6/15 上午11:40 6 | @File :__init__.py.py 7 | =================================''' 8 | import pandas as pd 9 | import warnings 10 | import os 11 | from kon.model.feature_eng.feature_transform import feature_tool 12 | from kon.model.feature_eng.base_model import base_model 13 | 14 | warnings.filterwarnings("ignore") 15 | pd.set_option('display.max_columns', None) 16 | pd.set_option('display.max_rows', None) 17 | pd.set_option('max_colwidth', 100) 18 | 19 | print(os.getcwd()) 20 | #---------------------------------------------------- 21 | data_folder = '../../data/' 22 | origin_data_folder = data_folder + 'origin_data/' 23 | submit_data_folder = data_folder + 'submit_data/' 24 | eda_data_folder = data_folder + 'eda_data/' 25 | fea_data_folder = data_folder + 'fea_data/' 26 | #----------------------------------------------------------------- 27 | model_tool = base_model(submit_data_folder) 28 | fea_tool = feature_tool(fea_data_folder) 29 | #----------------------------------------------------------------- -------------------------------------------------------------------------------- /kon/model/ctr_model/layer/core_layer/core_layer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/3 上午11:41 6 | @File :core_layer.py 7 | =================================''' 8 | import tensorflow as tf 9 | from tensorflow.keras.initializers import glorot_uniform 10 | import pandas as pd 11 | import warnings 12 | import os 13 | from kon.model.feature_eng.feature_transform import feature_tool 14 | from kon.model.feature_eng.base_model import base_model 15 | 16 | warnings.filterwarnings("ignore") 17 | pd.set_option('display.max_columns', None) 18 | pd.set_option('display.max_rows', None) 19 | pd.set_option('max_colwidth', 100) 20 | 21 | print(os.getcwd()) 22 | #---------------------------------------------------- 23 | data_folder='../../data/' 24 | origin_data_folder=data_folder+'origin_data/' 25 | submit_data_folder=data_folder+'submit_data/' 26 | eda_data_folder=data_folder+'eda_data/' 27 | fea_data_folder=data_folder+'fea_data/' 28 | #----------------------------------------------------------------- 29 | model_tool=base_model(submit_data_folder) 30 | fea_tool=feature_tool(fea_data_folder) 31 | #----------------------------------------------------------------- 32 | class StackLayer(tf.keras.layers.Layer): 33 | ''' 34 | support: 35 | concat(flatten) 36 | ''' 37 | def __init__(self,use_flat=True,axis=None): 38 | super(StackLayer, self).__init__() 39 | if axis: 40 | self.concat = tf.keras.layers.Concatenate(axis=axis) 41 | else: 42 | self.concat = tf.keras.layers.Concatenate() 43 | self.use_flat=use_flat 44 | 45 | def build(self, input_shape): 46 | super(StackLayer, self).build(input_shape) 47 | self.flat = [tf.keras.layers.Flatten(name='stack_flatten_{}'.format(str(i))) for i in range(len(input_shape))] 48 | 49 | def call(self, inputs, **kwargs): 50 | if self.use_flat: 51 | inputs=[flat_(input_) for input_,flat_ in zip(inputs,self.flat)] 52 | if len(inputs)==1: 53 | return inputs[0] 54 | else: 55 | return self.concat(inputs) 56 | 57 | 58 | class ScoreLayer(tf.keras.layers.Layer): 59 | def __init__(self,use_add=False,use_inner=False,use_global=False,seed=2020): 60 | from kon.model.ctr_model.layer.interactive_layer.interactive_layer import InnerLayer 61 | super(ScoreLayer, self).__init__() 62 | self.use_add=use_add 63 | self.add=tf.keras.layers.Add() 64 | self.activate=tf.keras.layers.Activation('sigmoid') 65 | self.use_inner=use_inner 66 | self.inner=InnerLayer(use_inner=True) 67 | self.use_global=use_global 68 | self.seed=seed 69 | 70 | def build(self, input_shape): 71 | super(ScoreLayer, self).build(input_shape) 72 | if self.use_global: 73 | self.global_bias=self.add_weight(shape=(1,),initializer=glorot_uniform(self.seed)) 74 | 75 | def call(self, inputs, **kwargs): 76 | if self.use_add: 77 | inputs=self.add(inputs) 78 | if self.use_global: 79 | inputs=self.add([inputs,self.global_bias]) 80 | if self.use_inner: 81 | inputs=self.inner(inputs) 82 | 83 | output=self.activate(inputs) 84 | return output 85 | 86 | class MergeScoreLayer(tf.keras.layers.Layer): 87 | def __init__(self,use_merge:bool=True,output_dim=2): 88 | super(MergeScoreLayer, self).__init__() 89 | self.concat=StackLayer() 90 | self.dense=tf.keras.layers.Dense(units=output_dim,activation='softmax') 91 | self.use_merge=use_merge 92 | 93 | def build(self, input_shape): 94 | super(MergeScoreLayer, self).build(input_shape) 95 | 96 | def call(self, inputs, **kwargs): 97 | if self.use_merge: 98 | inputs=self.concat(inputs) 99 | x=self.dense(inputs) 100 | return x 101 | 102 | class HiddenLayer(tf.keras.layers.Layer): 103 | ''' 104 | notice: 105 | can to replace dense,to use other method to cal 106 | e.g:can to mult-head-attention achieve autoint 107 | Dnn core: 108 | hidden achieve 109 | In feature, to drop it 110 | ''' 111 | def __init__(self,hidden_units:int,use_bn:bool=True,seed=2020,l2_reg=0,other_dense=None): 112 | super(HiddenLayer, self).__init__() 113 | self.dense=tf.keras.layers.Dense( 114 | units=hidden_units,kernel_initializer=glorot_uniform(seed=seed), 115 | bias_initializer=glorot_uniform(seed=seed),kernel_regularizer=tf.keras.regularizers.l2(l2_reg) 116 | ) 117 | if other_dense: 118 | self.dense=other_dense 119 | self.bn=tf.keras.layers.BatchNormalization() 120 | self.use_bn=use_bn 121 | 122 | def build(self, input_shape): 123 | super(HiddenLayer, self).build(input_shape) 124 | 125 | def call(self, inputs, **kwargs): 126 | x=self.dense(inputs) 127 | if self.use_bn: 128 | x=self.bn(x) 129 | return x,inputs 130 | 131 | class ResActivateLayer(tf.keras.layers.Layer): 132 | ''' 133 | notice: 134 | res layer activate,support ln,bn... 135 | ''' 136 | def __init__(self,use_bn,use_ln,hidden_activate): 137 | super(ResActivateLayer, self).__init__() 138 | self.use_ln = use_ln 139 | self.use_bn = use_bn 140 | self.ln = tf.keras.layers.LayerNormalization() 141 | self.bn = tf.keras.layers.BatchNormalization() 142 | self.active = hidden_activate 143 | 144 | 145 | def build(self, input_shape): 146 | super(ResActivateLayer, self).build(input_shape) 147 | 148 | def call(self, inputs, **kwargs): 149 | if self.use_bn: 150 | inputs = self.bn(inputs) 151 | if self.use_ln: 152 | inputs = self.ln(inputs) 153 | 154 | x = self.active(inputs) 155 | 156 | return x 157 | 158 | 159 | class DnnLayer(tf.keras.layers.Layer): 160 | def __init__(self,hidden_units:list=None,l2_reg=0,hidden_activate=tf.keras.layers.ReLU(),use_bn:bool=False,res_unit=1, 161 | output_dim=-1,seed=2020,other_dense=None,use_ln:bool=False,use_flatten=False,**kwargs): 162 | ''' 163 | notice: 164 | dense of dnn can to replace other layer, 165 | e.g:mult head atten(autoInt), 166 | to_replace:other_dense,succ to replace. 167 | 168 | :param hidden_units:please make sure to need units list 169 | when use other dense,need to input it too. 170 | e.g need 3 hidden,but use other dense==>[[],[],[]] 171 | num is not import,shape is very import 172 | 173 | :param res_unit:res add skip num 174 | 175 | :param activate:hidden activate 176 | Dnn core: 177 | supports auto bn 178 | ''' 179 | super(DnnLayer, self).__init__(**kwargs) 180 | self.hidden_list=other_dense 181 | if not other_dense: 182 | self.hidden_list=[HiddenLayer(hidden_units=dim,use_bn=False,other_dense=other_dense)for dim in hidden_units] 183 | self.activate=hidden_activate 184 | self.activate=[ResActivateLayer(use_bn=use_bn,use_ln=use_ln,hidden_activate=hidden_activate) for idx_ in range(len(self.hidden_list))] 185 | self.add=tf.keras.layers.Add() 186 | self.seed=2020 187 | self.output_dim=output_dim 188 | self.res_unit=res_unit 189 | if output_dim!=-1: 190 | self.logit_layer=tf.keras.layers.Dense( 191 | units=output_dim,kernel_initializer=glorot_uniform(seed=seed), 192 | bias_initializer=glorot_uniform(seed=seed) 193 | ) 194 | if use_flatten: 195 | self.flat=tf.keras.layers.Flatten() 196 | self.use_flatten=use_flatten 197 | 198 | def build(self, input_shape): 199 | super(DnnLayer, self).build(input_shape) 200 | 201 | def call(self, inputs,**kwargs): 202 | x=inputs 203 | res=[[],[]] 204 | for idx_,hidden_layer in enumerate(self.hidden_list): 205 | [x,ori]=hidden_layer(x) 206 | if idx_==0: 207 | res=[ori,x] 208 | if (idx_+1)%self.res_unit!=0 or self.res_unit==1: 209 | res[-1]=x 210 | if (idx_+1)%self.res_unit==0: 211 | try: 212 | x=self.add(res) 213 | except ValueError: 214 | x=res[-1] 215 | 216 | x=self.activate[idx_](x) 217 | if (idx_+1)%self.res_unit==0: 218 | res[0]=x 219 | 220 | if self.use_flatten: 221 | x = self.flat(x) 222 | 223 | if self.output_dim!=-1: 224 | x=self.logit_layer(x) 225 | 226 | return x 227 | 228 | class IntraViewPoolingLayer(tf.keras.layers.Layer): 229 | def __init__(self): 230 | super(IntraViewPoolingLayer, self).__init__() 231 | 232 | def build(self, input_shape): 233 | super(IntraViewPoolingLayer, self).build(input_shape) 234 | 235 | def call(self, inputs, **kwargs): 236 | output=tf.expand_dims(tf.reduce_mean(inputs,axis=1),axis=1) 237 | 238 | return output 239 | 240 | class AlignLayer(tf.keras.layers.Layer): 241 | ''' 242 | format dim,if [a,b.,.] dim not eq, 243 | format to [a,b...] higher dim 244 | ''' 245 | def __init__(self): 246 | super(AlignLayer, self).__init__() 247 | 248 | def build(self, input_shape): 249 | super(AlignLayer, self).build(input_shape) 250 | dim_list=[i[-1] for i in input_shape] 251 | max_dim=max(dim_list) 252 | self.format_dense=[tf.keras.layers.Dense( 253 | units=max_dim) if iembedding info drop 115 | op:sum(embedding_list),but it represent ? 116 | ''' 117 | super(OPnnLayer, self).__init__() 118 | self.seed=seed 119 | self.use_reduce=use_reduce 120 | self.outer=InnerLayer(use_inner=False,perm=[0,2,1],mod=(1,2)) 121 | self.add=tf.keras.layers.Add() 122 | self.use_flatten=use_flatten 123 | 124 | def build(self, input_shape): 125 | super(OPnnLayer, self).build(input_shape) 126 | fea_size=len(input_shape) 127 | if self.use_reduce: 128 | fea_size=1 129 | self.flat=[tf.keras.layers.Flatten() for i in range(fea_size)] 130 | 131 | def call(self, inputs, **kwargs): 132 | if self.use_reduce: 133 | sum_inputs=self.add(inputs) 134 | # sum_inputs=tf.expand_dims(sum_inputs,axis=-1) 135 | outer_list=self.outer([sum_inputs,sum_inputs]) 136 | else: 137 | # inputs=[tf.expand_dims(input_,axis=-1) for input_ in inputs] 138 | outer_list = self.outer(inputs) 139 | 140 | if self.use_flatten: 141 | outer_list=[flat_(outer_) for outer_,flat_ in zip(outer_list,self.flat)] 142 | 143 | return outer_list 144 | 145 | class FmLayer(tf.keras.layers.Layer): 146 | def __init__(self,use_inner:bool=True,mod=1,use_add=True,**kwargs): 147 | ''' 148 | :param mod: 149 | 0.output matrix 150 | 1.output matrix result 151 | ''' 152 | super(FmLayer, self).__init__(**kwargs) 153 | self.cross=InnerLayer(use_inner=use_inner,mod=mod,use_add=use_add) 154 | self.add = tf.keras.layers.Add() 155 | self.use_add=use_add 156 | 157 | def build(self,input_shape): 158 | super(FmLayer, self).build(input_shape) 159 | self.cross.build(input_shape) 160 | 161 | def call(self, inputs, **kwargs): 162 | ''' 163 | :param inputs:[cross_embed,linear_embed] 164 | ''' 165 | cross = self.cross(inputs[0]) 166 | output = self.add([cross]+inputs[1]) 167 | if self.use_add: 168 | return output 169 | else: 170 | return cross+inputs[1] 171 | 172 | class LinearLayer(tf.keras.layers.Layer): 173 | def __init__(self,initializer:str='random_normal'): 174 | super(LinearLayer,self).__init__() 175 | self.initalizer=initializer 176 | 177 | def build(self, input_shape): 178 | super(LinearLayer, self).build(input_shape) 179 | self.w = self.add_weight(shape=(input_shape[-1],1), 180 | initializer=self.initalizer, 181 | trainable=True) 182 | self.b = self.add_weight(shape=(1,), 183 | initializer=self.initalizer, 184 | trainable=True) 185 | 186 | def call(self,inputs,**kwargs): 187 | return [tf.tensordot(a=input,b=self.w,axes=1)+self.b for input in inputs] 188 | 189 | class SparseEmbed(tf.keras.layers.Layer): 190 | ''' 191 | embedding core: 192 | supports sparse embed & linear 193 | supports: 194 | flatten,add 195 | ''' 196 | def __init__(self,sparse_info:list,is_linear=False,use_flatten=True,use_add=False,seed=2020,support_masking=True,mask_zero=False): 197 | super(SparseEmbed,self).__init__() 198 | self.sparse_info=sparse_info 199 | self.flatten=None 200 | self.supports_masking=support_masking 201 | self.is_linear = is_linear 202 | self.mask_zero=mask_zero 203 | self.use_add = use_add 204 | self.seed=seed 205 | 206 | if use_flatten: 207 | self.flatten=[tf.keras.layers.Flatten()for i in sparse_info] 208 | if use_add: 209 | self.add=tf.keras.layers.Add() 210 | 211 | def build(self, input_shape): 212 | if not self.is_linear: 213 | self.embed=[tf.keras.layers.Embedding( 214 | name=info_.fea_name,input_dim=info_.word_size,output_dim=info_.cross_unit, 215 | mask_zero=info_.mask_zero,embeddings_initializer=glorot_uniform(seed=self.seed), 216 | input_length=info_.input_length,trainable=info_.is_trainable,weights=info_.pre_weight, 217 | embeddings_regularizer=tf.keras.regularizers.l2(info_.emb_reg) 218 | ) if info_.cross_unit!=0 else [] for info_ in self.sparse_info] 219 | else: 220 | self.embed=[tf.keras.layers.Embedding( 221 | name=info_.fea_name,input_dim=info_.word_size,output_dim=info_.linear_unit 222 | )for info_ in self.sparse_info] 223 | super(SparseEmbed, self).build(input_shape) 224 | 225 | def call(self,inputs,**kwargs): 226 | 227 | embed_list = [emb_(input_) if info_.cross_unit != 0 else input_ for emb_, input_, info_ in 228 | zip(self.embed ,inputs, self.sparse_info)] 229 | 230 | if self.flatten: 231 | embed_list=[flat_(embed_) for flat_,embed_ in zip(self.flatten,embed_list)] 232 | 233 | if self.use_add: 234 | embed_list=self.add(embed_list) 235 | 236 | self.embed_list=embed_list 237 | 238 | if self.mask_zero: 239 | return embed_list,\ 240 | [emb._keras_mask if info_.cross_unit!=0 else [] for emb,info_ in zip(embed_list,self.sparse_info)] 241 | else: 242 | return embed_list 243 | 244 | def compute_mask(self, inputs, mask=None): 245 | if not self.mask_zero: 246 | return None 247 | return [embed._keras_mask for embed in self.embed_list] 248 | 249 | 250 | class CrossLayer(tf.keras.layers.Layer): 251 | ''' 252 | DCN core: 253 | x^k=(x^k-1*x0)+b+x0(Recursive Format) 254 | ''' 255 | def __init__(self,cross_hidden=3,seed=2020,**kwargs): 256 | super(CrossLayer, self).__init__(**kwargs) 257 | self.outer=InnerLayer(use_inner=False,mod=(-2,-1),perm=([0,2,1])) 258 | self.cross_hidden=cross_hidden 259 | self.seed=seed 260 | self.dot_=[tf.keras.layers.Dot(axes=1,name='Dot_{}'.format(str(i))) for i in range(cross_hidden)] 261 | self.add_=[tf.keras.layers.Add(name='Add_{}'.format(str(i)))for i in range(cross_hidden)] 262 | 263 | 264 | def build(self, input_shape): 265 | self.kernel=[ 266 | self.add_weight(name='outer_weight_{}'.format(str(i)), 267 | shape=[input_shape[-1],1],initializer=glorot_uniform(seed=self.seed) 268 | )for i in range(self.cross_hidden)] 269 | self.bias=[ 270 | self.add_weight(name='outer_bias_{}'.format(str(i)), 271 | shape=[input_shape[-1],1],initializer=tf.keras.initializers.zeros() 272 | )for i in range(self.cross_hidden)] 273 | super(CrossLayer, self).build(input_shape) 274 | 275 | def call(self, inputs, **kwargs): 276 | inputs=tf.expand_dims(inputs,axis=-1) 277 | pre_inputs=inputs 278 | for i in range(self.cross_hidden): 279 | pre_inputs=tf.keras.backend.batch_dot(inputs,tf.keras.backend.dot( 280 | tf.transpose(pre_inputs,perm=[0,2,1]),self.kernel[i]))+pre_inputs+self.bias[i] 281 | 282 | return pre_inputs 283 | 284 | 285 | class CIN(tf.keras.layers.Layer): 286 | ''' 287 | XDeep core: 288 | x^k=sum[w()] 289 | x1->....x^k---> RNN(Recursive Format) 290 | ==hk*m*d--->con1D--->hk*d==>x^k 291 | final_output=concat(all feature map sum) 292 | 293 | feature map: 294 | row=D,col=m*h 295 | ''' 296 | def __init__(self, conv_size=None, output_dim=1): 297 | super(CIN, self).__init__() 298 | if conv_size is None: 299 | conv_size = [200, 200, 200] 300 | self.conv_size=conv_size 301 | self.concat=tf.keras.layers.Concatenate() 302 | self.output_dim=output_dim 303 | if output_dim==1: 304 | self.logit_layer=tf.keras.layers.Dense(1) 305 | 306 | def build(self, input_shape): 307 | super(CIN, self).build(input_shape) 308 | self.hidden_conv=[tf.keras.layers.Conv1D(size,1) for size in self.conv_size] 309 | 310 | def call(self, inputs, **kwargs): 311 | x0 = tf.split(inputs, [1] * inputs.shape[-1], -1) 312 | pre_=x0 313 | sum_pooling_list=[] 314 | 315 | for conv_ in self.hidden_conv: 316 | z = tf.matmul(x0, pre_, transpose_b=True) 317 | z = tf.transpose(z, perm=[1, 0, 3, 2]) 318 | z=tf.reshape(z,[-1,z.shape[1],z.shape[2]*z.shape[3]]) 319 | z=conv_(z) 320 | pre_ = tf.transpose(z,[0,2,1]) 321 | pre_=tf.split(pre_, [1] * pre_.shape[-1], -1) 322 | sum_pooling_list.append(tf.reduce_sum(z, axis=-1)) 323 | output=self.concat(sum_pooling_list) 324 | if self.output_dim==1: 325 | output=self.logit_layer(output) 326 | 327 | return output 328 | 329 | class AttentionBaseLayer(tf.keras.layers.Layer): 330 | ''' 331 | AFM core: 332 | base attention 333 | advise to go directly DIN 334 | ''' 335 | def __init__(self,attention_dim=4,seed=2020,output_dim=1): 336 | super(AttentionBaseLayer, self).__init__() 337 | self.add=tf.keras.layers.Add() 338 | self.atten_dim=attention_dim 339 | self.seed=seed 340 | self.single_mlp = tf.keras.layers.Dense(1, 'relu', use_bias=False, kernel_initializer=glorot_uniform(self.seed)) 341 | self.single_softmax=tf.keras.layers.Activation('softmax') 342 | self.output_layer=tf.keras.layers.Dense(output_dim) 343 | 344 | def build(self, input_shape): 345 | super(AttentionBaseLayer, self).build(input_shape) 346 | 347 | self.kernel_w=self.add_weight( 348 | name='single_score_w', 349 | shape=(input_shape[0][-1],self.atten_dim), 350 | initializer=glorot_uniform(seed=self.seed) 351 | ) 352 | self.kernel_b=self.add_weight( 353 | name='single_score_b', 354 | shape=(self.atten_dim,), 355 | initializer=glorot_uniform(seed=self.seed) 356 | ) 357 | 358 | 359 | def call(self, inputs, **kwargs): 360 | inputs=tf.concat(inputs,axis=1) 361 | score_=self.single_mlp(tf.add(tf.keras.backend.dot(inputs,self.kernel_w),self.kernel_b)) 362 | score_w=self.single_softmax(score_) 363 | atten_inputs=tf.reduce_sum(score_w*inputs,axis=1) 364 | output=self.output_layer(atten_inputs) 365 | 366 | return output 367 | 368 | -------------------------------------------------------------------------------- /kon/model/ctr_model/model/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/29 下午4:01 6 | @File :__init__.py.py 7 | =================================''' -------------------------------------------------------------------------------- /kon/model/cvr_model/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/9/26 下午6:23 6 | @File :__init__.py.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' 9 | from sklearn.model_selection import KFold, StratifiedKFold 10 | from sklearn.metrics import mean_squared_error as mse 11 | from sklearn.preprocessing import LabelEncoder 12 | from sklearn.metrics import f1_score, r2_score 13 | from hyperopt import fmin, tpe, hp, partial 14 | from numpy.random import random, shuffle 15 | import matplotlib.pyplot as plt 16 | from pandas import DataFrame 17 | import tensorflow as tf 18 | from tqdm import tqdm 19 | from PIL import Image 20 | import lightgbm as lgb 21 | import networkx as nx 22 | import pandas as pd 23 | import numpy as np 24 | import warnings 25 | import cv2 26 | import os 27 | import gc 28 | import re 29 | import datetime 30 | import sys 31 | from kon.model.embedding.setence_model import * 32 | from kon.model.feature_eng.feature_transform import feature_tool 33 | from kon.model.feature_eng.base_model import base_model 34 | from kon.model.ctr_model.model.models import * -------------------------------------------------------------------------------- /kon/model/cvr_model/layer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/9/26 下午6:24 6 | @File :__init__.py.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' 9 | from sklearn.model_selection import KFold, StratifiedKFold 10 | from sklearn.metrics import mean_squared_error as mse 11 | from sklearn.preprocessing import LabelEncoder 12 | from sklearn.metrics import f1_score, r2_score 13 | from hyperopt import fmin, tpe, hp, partial 14 | from numpy.random import random, shuffle 15 | import matplotlib.pyplot as plt 16 | from pandas import DataFrame 17 | import tensorflow as tf 18 | from tqdm import tqdm 19 | from PIL import Image 20 | import lightgbm as lgb 21 | import networkx as nx 22 | import pandas as pd 23 | import numpy as np 24 | import warnings 25 | import cv2 26 | import os 27 | import gc 28 | import re 29 | import datetime 30 | import sys 31 | from kon.model.embedding.setence_model import * 32 | from kon.model.feature_eng.feature_transform import feature_tool 33 | from kon.model.feature_eng.base_model import base_model 34 | from kon.model.ctr_model.model.models import * -------------------------------------------------------------------------------- /kon/model/cvr_model/model/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/9/26 下午6:24 6 | @File :__init__.py.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' 9 | from sklearn.model_selection import KFold, StratifiedKFold 10 | from sklearn.metrics import mean_squared_error as mse 11 | from sklearn.preprocessing import LabelEncoder 12 | from sklearn.metrics import f1_score, r2_score 13 | from hyperopt import fmin, tpe, hp, partial 14 | from numpy.random import random, shuffle 15 | import matplotlib.pyplot as plt 16 | from pandas import DataFrame 17 | import tensorflow as tf 18 | from tqdm import tqdm 19 | from PIL import Image 20 | import lightgbm as lgb 21 | import networkx as nx 22 | import pandas as pd 23 | import numpy as np 24 | import warnings 25 | import cv2 26 | import os 27 | import gc 28 | import re 29 | import datetime 30 | import sys 31 | from kon.model.embedding.setence_model import * 32 | from kon.model.feature_eng.feature_transform import feature_tool 33 | from kon.model.feature_eng.base_model import base_model 34 | from kon.model.ctr_model.model.models import * -------------------------------------------------------------------------------- /kon/model/embedding/.idea/embedding.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /kon/model/embedding/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /kon/model/embedding/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /kon/model/embedding/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /kon/model/embedding/README.md: -------------------------------------------------------------------------------- 1 | 这部分其实你会发现这个和浅梦大佬的很像,这部分其实是以前我刚开始接触这一块,照着他写的写的 2 | 未来会重构,当初学得有些模糊,这里面主要是一些图嵌入的方法,还会cover更多的gnn方面的模型 3 | 4 | ![image](https://pic2.zhimg.com/80/v2-c94c026c9a21aa87a7968088a214f66d_1440w.jpg) -------------------------------------------------------------------------------- /kon/model/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/29 下午4:01 6 | @File :__init__.py.py 7 | =================================''' 8 | -------------------------------------------------------------------------------- /kon/model/embedding/logs/0/best_weights.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/logs/0/best_weights.h5 -------------------------------------------------------------------------------- /kon/model/embedding/logs/0/events.out.tfevents.1564644409.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/logs/0/events.out.tfevents.1564644409.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/logs/0/events.out.tfevents.1565180032.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/logs/0/events.out.tfevents.1565180032.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/logs/0/events.out.tfevents.1565180080.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/logs/0/events.out.tfevents.1565180080.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/model_test.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | from kon.model.embedding.setence_model.deepwalk import DeepWalk 3 | from kon.model.embedding.setence_model.line import Line 4 | from kon.model.embedding.setence_model.node2vec import node2vec 5 | from kon.model.embedding.setence_model.sdne import sdne 6 | from kon.model.embedding.util.util_tool import read_graph 7 | from kon.model.embedding.util.evaluate import evaluate_tools 8 | 9 | def deep_walk_run(edgelist_path,is_evluate=False): 10 | Graph = read_graph(edgelist_path) 11 | 12 | deepwalk = DeepWalk( 13 | Graph=Graph, 14 | per_vertex=80, 15 | walk_length=10, 16 | window_size=5, 17 | dimension_size=8, 18 | work=4 19 | ) 20 | embeddings = deepwalk.transform() 21 | if is_evluate: 22 | eval = evaluate_tools(embeddings=embeddings, label_path='wiki/Wiki_labels.txt') 23 | eval.plot_embeddings() 24 | return embeddings 25 | 26 | def line_run(): 27 | from kon.model.embedding.util.util_tool import read_graph 28 | import os 29 | print(os.getcwd()) 30 | Graph = read_graph('wiki/Wiki_edgelist.txt') 31 | line = Line( 32 | Graph=Graph, 33 | dimension_size=128, 34 | per_vertex=100, 35 | walk_length=10, 36 | window_size=5, 37 | work=1, 38 | negative_ratio=1, 39 | batch_size=128, 40 | log_dir='logs/0/', 41 | epoch=100, 42 | ) 43 | embeddings = line.transform() 44 | from kon.model.embedding.util.evaluate import evaluate_tools 45 | tool = evaluate_tools(embeddings, label_path='wiki/Wiki_labels.txt') 46 | tool.plot_embeddings() 47 | 48 | def node2vec_run(): 49 | Graph = read_graph('wiki/Wiki_edgelist.txt') 50 | 51 | node_vec = node2vec( 52 | Graph=Graph, 53 | per_vertex=80, 54 | walk_length=10, 55 | window_size=5, 56 | dimension_size=128, 57 | work=1, 58 | p=0.25, 59 | q=4 60 | ) 61 | 62 | embeddings = node_vec.transform() 63 | eval_tool = evaluate_tools(embeddings, label_path='wiki/Wiki_labels.txt') 64 | eval_tool.plot_embeddings() 65 | 66 | def sdne_run(): 67 | Graph = read_graph('wiki/Wiki_edgelist.txt') 68 | sden_model = sdne( 69 | Graph=Graph, 70 | dimension_size=128, 71 | per_vertex=100, 72 | walk_length=10, 73 | window_size=5, 74 | work=1, 75 | beta=5, 76 | alpha=1e-6, 77 | verbose=1, 78 | epochs=1000, 79 | batch_size=512, 80 | log_dir='logs/0/', 81 | hidden_size_list=[256, 128], 82 | l1=1e-5, 83 | l2=1e-4 84 | ) 85 | sden_model.train() 86 | embeddings = sden_model.get_embeddings() 87 | 88 | from kon.model.embedding.util.evaluate import evaluate_tools 89 | eval_tool = evaluate_tools(embeddings, label_path='wiki/Wiki_labels.txt') 90 | eval_tool.plot_embeddings() 91 | 92 | 93 | def model_test(build_name,edgelist_path='wiki/Wiki_edgelist.txt',embedding=8): 94 | if build_name=='deepwalk': 95 | embedding=deep_walk_run(edgelist_path) 96 | elif build_name=='line': 97 | line_run() 98 | elif build_name=='node2vec': 99 | node2vec_run() 100 | elif build_name=='sdne': 101 | sdne_run() 102 | elif build_name=='all': 103 | deep_walk_run(edgelist_path) 104 | line_run() 105 | node2vec_run() 106 | sdne_run() 107 | 108 | return embedding 109 | 110 | if __name__=='__main__': 111 | model_test('deepwalk') 112 | 113 | 114 | -------------------------------------------------------------------------------- /kon/model/embedding/other/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/29 下午4:01 6 | @File :__init__.py.py 7 | =================================''' 8 | -------------------------------------------------------------------------------- /kon/model/embedding/other/other-collections.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | from collections import namedtuple 4 | #创建了一个tuple对象 5 | Point=namedtuple('Point',['x','y']) 6 | p=Point(1,2) 7 | print(p.x) 8 | print(p.y) 9 | 10 | 11 | from collections import deque 12 | 13 | #实现了队列的对象 14 | q=deque(['a','b','c']) 15 | q.append('x') 16 | q.appendleft('y') 17 | print(q) 18 | q.pop() 19 | q.popleft() 20 | print(q) 21 | 22 | 23 | from collections import defaultdict 24 | 25 | #为dict的key设置默认值 26 | dd=defaultdict(int) 27 | dd['key1']=dd['key1']+1 28 | print(dd.keys()) 29 | 30 | 31 | from collections import OrderedDict 32 | #OrderDict会按照插入顺序排序,不过并没有看出来和普通的有什么区别 33 | d = dict([('n', 1), ('b', 2), ('c', 3)]) 34 | print(d) 35 | 36 | d=OrderedDict([('n',1), ('b', 2), ('c', 3)]) 37 | print(d) 38 | 39 | 40 | from collections import Counter 41 | #Counter计数器 42 | c=Counter() 43 | for ch in 'programming': 44 | c[ch]=c[ch]+1 45 | 46 | print(c) 47 | 48 | -------------------------------------------------------------------------------- /kon/model/embedding/other/other-networks.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import networkx as nx 3 | 4 | #建立图 5 | G=nx.Graph() 6 | 7 | #添加节点 8 | G.add_node(1) 9 | G.add_nodes_from([2,3]) 10 | 11 | #一个图添加到另一个图中 12 | # H=nx.path_graph(10) 13 | # G.add_nodes_from(H) 14 | 15 | #添加边 16 | G.add_edge(1,2) 17 | e=(2,3) 18 | G.add_edge(*e) 19 | # G.add_edges_from(H.edges) 20 | 21 | import matplotlib.pyplot as plt 22 | # G=nx.petersen_graph() 23 | nx.draw(G,with_labels=True,font_weight='bold') 24 | plt.show() 25 | 26 | #有向图 27 | DG=nx.DiGraph() 28 | DG.add_weighted_edges_from([(1,2,0.5),(3,1,0.75)]) 29 | print(DG.out_degree(1,weight='weight')) 30 | 31 | -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/7/21 上午9:00 6 | @File :__init__.py.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/backone_language_model.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | from gensim.models import Word2Vec 3 | 4 | class language_model(): 5 | def __init__(self,window_size,dimension_size,work): 6 | self.crop_size=int(window_size) 7 | self.unit_size=dimension_size 8 | self.workers=work 9 | 10 | def word2vec_on_train(self,sentence): 11 | model=Word2Vec( 12 | sentences=sentence, 13 | window=self.crop_size, 14 | size=self.unit_size, 15 | sg=1, 16 | hs=0, 17 | workers=self.workers, 18 | iter=3, 19 | min_count=0 20 | ) 21 | 22 | return model 23 | 24 | 25 | -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/backone_optimize.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import numpy as np 3 | from numpy import random 4 | 5 | class optimize_funcation(): 6 | 7 | def __init__(self): 8 | pass 9 | 10 | def generate_alias_table(self, all_probability): 11 | num_probability=len(all_probability) 12 | 13 | all_probability=list((np.array(all_probability)*num_probability)/np.sum(all_probability)) 14 | 15 | 16 | small, large = [], [] 17 | prab, alias = [-1] * num_probability, [-1] * num_probability 18 | 19 | format_count=0 20 | for prob_rank in range(num_probability): 21 | if all_probability[prob_rank] == 1: 22 | prab[prob_rank] = 1 23 | alias[prob_rank] = -1 24 | format_count+=1 25 | elif all_probability[prob_rank] > 1: 26 | large.append(prob_rank) 27 | else: 28 | small.append(prob_rank) 29 | 30 | if format_count==num_probability: 31 | return prab,alias 32 | 33 | while 1: 34 | if len(small)==0: 35 | break 36 | if len(large)==0: 37 | break 38 | small_rank = small.pop() 39 | small_data = all_probability[small_rank] 40 | need_data = 1 - small_data 41 | large_rank = large.pop() 42 | rest_data = all_probability[large_rank] - need_data 43 | 44 | prab[small_rank] = small_data 45 | alias[small_rank] = large_rank 46 | all_probability[large_rank]=rest_data 47 | 48 | if rest_data == 1: 49 | prab[large_rank] = 1 50 | alias[large_rank] = -1 51 | 52 | elif rest_data > 1: 53 | large.append(large_rank) 54 | else: 55 | small.append(large_rank) 56 | 57 | while len(small)!=0: 58 | small_rank=small.pop() 59 | prab[small_rank]=1 60 | while len(large)!=0: 61 | large_rank=large.pop() 62 | prab[large_rank]=1 63 | 64 | return prab, alias 65 | 66 | def alias_sample(self, prab, alias,rank=None): 67 | if rank==None: 68 | rank=int(random.random()*len(prab)) 69 | prab_ = random.random() 70 | if prab_ < prab[rank]: 71 | 72 | return rank 73 | else: 74 | return alias[rank] 75 | def batch_alias_sample(self,prab,alias,rank_list): 76 | all_index=[] 77 | for rank in rank_list: 78 | index=self.alias_sample(prab,alias,rank) 79 | all_index.append(index) 80 | return all_index 81 | 82 | # kon 83 | def gen_prob_dist(self,N): 84 | p = np.random.randint(0, 100, N) 85 | return p / np.sum(p) 86 | 87 | def simulate(self,N=100, k=10000): 88 | 89 | truth = self.gen_prob_dist(N) 90 | 91 | area_ratio = truth * N 92 | prab, alias = self.generate_alias_table(all_probability=area_ratio) 93 | 94 | ans = np.zeros(N) 95 | for _ in range(k): 96 | i = self.alias_sample(alias=alias,prab=prab,rank=_) 97 | 98 | ans[i] += 1 99 | 100 | 101 | return ans / np.sum(ans), truth 102 | 103 | if __name__=='__main__': 104 | tool=optimize_funcation() 105 | tool.simulate() -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/deepwalk.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | from numpy import random 3 | from kon.model.embedding.setence_model.walk_core_model import core_model 4 | from kon.model.embedding.util.evaluate import evaluate_tools 5 | from tqdm import tqdm 6 | from kon.model.embedding.util.util_tool import read_graph 7 | 8 | class DeepWalk(core_model): 9 | 10 | def __init__(self,Graph,per_vertex,walk_length,window_size,dimension_size,work): 11 | super().__init__(Graph,per_vertex,walk_length,window_size,dimension_size,work) 12 | 13 | def deepwalk(self): 14 | sentence_list=[] 15 | 16 | for num in tqdm(range(self.walk_epoch),desc='walk epoch'): 17 | random.shuffle(self.all_nodes) 18 | for vertex in tqdm(self.all_nodes,desc='generator node walk seq'): 19 | sentence_list.append(self.random_walk(start_vertex=vertex)) 20 | 21 | return sentence_list 22 | 23 | def transform(self): 24 | sentence_list=self.deepwalk() 25 | embeddings=self.embdding_train(sentence_list) 26 | return embeddings 27 | 28 | 29 | if __name__=='__main__': 30 | 31 | Graph = read_graph('wiki/Wiki_edgelist.txt') 32 | 33 | deepwalk=DeepWalk( 34 | Graph=Graph, 35 | per_vertex=80, 36 | walk_length=10, 37 | window_size=5, 38 | dimension_size=64, 39 | work=4 40 | ) 41 | 42 | embeddings=deepwalk.transform() 43 | print(embeddings.keys()) 44 | print(embeddings.values()) 45 | eval = evaluate_tools(embeddings=embeddings) 46 | eval.plot_embeddings() 47 | 48 | -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/line.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | from kon.model.embedding.setence_model.walk_core_model import core_model 3 | from kon.model.embedding.util.util_tool import get_node_information 4 | from numpy import random 5 | import numpy as np 6 | import math 7 | 8 | class Line(core_model): 9 | 10 | def __init__(self,Graph,per_vertex,walk_length,window_size,dimension_size,work,log_dir,epoch,negative_ratio=0,order='second',batch_size=1024,times=1): 11 | super().__init__(Graph,per_vertex,walk_length,window_size,dimension_size,work) 12 | self.times=times 13 | self.epoch=epoch 14 | self.log_dir=log_dir 15 | self.batch_size=batch_size 16 | self.order=order 17 | self.negative_ratio=negative_ratio 18 | self.idx2node,self.node2idx=get_node_information(self.all_nodes) 19 | self.generate_smapling_table() 20 | 21 | def generate_edge_sampling_table(self): 22 | #边采样,防止论文中提到的权重差距大导致梯度爆炸 23 | numEdges = self.numEdges 24 | 25 | edges_sum = 0 26 | for edge in self.all_edges: 27 | edges_sum += self.G[edge[0]][edge[1]].get('weight', 1.0) 28 | 29 | #搜索每条边的权重 30 | all_probability = [] 31 | for edge in self.all_edges: 32 | probability = self.G[edge[0]][edge[1]].get('weight', 1.0) * numEdges / edges_sum 33 | all_probability.append(probability) 34 | 35 | self.edge_prab, self.edge_alias = self.optimize_fun.generate_alias_table(all_probability) 36 | 37 | def generate_node_sampling_table(self,power=0.75): 38 | node_degree = np.zeros(self.numNodes) 39 | 40 | #顶点采样,减少顶点数量,经验值power=0.75,论文将pagerannk的重要性判断定成了顶点的出度,或者说出度代表了每个顶点的权重 41 | #计算每个顶点出度 42 | for edge in self.all_edges: 43 | node_degree[self.node2idx[edge[0]]] += self.G[edge[0]][edge[1]].get('weight', 1.0) 44 | 45 | #对每个出度进行power减少 46 | weights_sum=0 47 | for rank in range(self.numNodes): 48 | weights_sum+=math.pow(node_degree[rank],power) 49 | 50 | #计算每个顶点的概率 51 | all_probability=[] 52 | for rank in range(self.numNodes): 53 | probability=float(math.pow(node_degree[rank],power))/weights_sum 54 | all_probability.append(probability) 55 | 56 | self.node_prab, self.node_alias = self.optimize_fun.generate_alias_table(all_probability) 57 | 58 | #生成alias需要的table 59 | def generate_smapling_table(self,power=0.75): 60 | self.generate_node_sampling_table(power) 61 | self.generate_edge_sampling_table() 62 | 63 | def generator_postive_data(self,data_index,start_index,end_index,edges_index): 64 | rank_list=[] 65 | for rank in range(start_index,end_index): 66 | rank_list.append(data_index[rank]) 67 | 68 | edge_index_list_ = self.optimize_fun.batch_alias_sample( 69 | prab=self.edge_prab, 70 | alias=self.edge_alias, 71 | rank_list=rank_list 72 | ) 73 | 74 | begin_node,end_node=[],[] 75 | 76 | for edge_index_ in edge_index_list_: 77 | begin_node.append(edges_index[edge_index_][0]) 78 | end_node.append(edges_index[edge_index_][1]) 79 | 80 | return begin_node,end_node 81 | 82 | def generator_negative_data(self,begin_node): 83 | rank_list = [] 84 | for i in range(len(begin_node)): 85 | rank_list.append(random.choice(list(range(len(self.node_prab))))) 86 | 87 | end_node = self.optimize_fun.batch_alias_sample( 88 | prab=self.node_prab, 89 | alias=self.node_alias, 90 | rank_list=rank_list 91 | ) 92 | 93 | return begin_node,end_node 94 | 95 | def generator_data(self): 96 | #edges_index:(begin_node,end_node) 97 | edges_index = [] 98 | for edge in self.all_edges: 99 | edge_index = (self.node2idx[edge[0]], self.node2idx[edge[1]]) 100 | edges_index.append(edge_index) 101 | 102 | #data_index:index of (edge_index) 103 | data_size=self.numEdges 104 | data_index=list(range(data_size)) 105 | random.shuffle(data_index) 106 | 107 | begin_node=[] 108 | start_index=0 109 | end_index=min(start_index+self.batch_size,data_size) 110 | 111 | #constrat negative number 112 | mod=0 113 | #num(generator negative data) 114 | mod_size=1+self.negative_ratio 115 | 116 | while True: 117 | if mod==0: 118 | begin_node,end_node=self.generator_postive_data(data_index,start_index,end_index,edges_index) 119 | sign=np.ones(len(begin_node)) 120 | 121 | else: 122 | begin_node,end_node=self.generator_negative_data(begin_node) 123 | sign=np.ones(len(begin_node))*-1 124 | 125 | if self.order == 'all': 126 | yield ([np.array(begin_node), np.array(end_node)], [sign, sign]) 127 | else: 128 | yield ([np.array(begin_node), np.array(end_node)], [sign]) 129 | 130 | #控制负样本个数 131 | mod+=1 132 | mod%=mod_size 133 | 134 | if mod==0: 135 | start_index = end_index 136 | end_index = min(start_index + self.batch_size, data_size) 137 | 138 | if start_index>=data_size: 139 | mod=0 140 | begin_node=[] 141 | random.shuffle(data_index) 142 | start_index=0 143 | end_index=min(start_index+self.batch_size,data_size) 144 | 145 | def train(self): 146 | model=self.creat_line_model() 147 | model.fit_generator( 148 | self.generator_data(), 149 | steps_per_epoch=((self.numEdges*(1+self.negative_ratio)-1)//self.batch_size+1)*self.times, 150 | verbose=1, 151 | epochs=self.epoch, 152 | callbacks=self.model_prepare(self.log_dir) 153 | ) 154 | 155 | def get_embedding(self): 156 | self.embeddings={} 157 | if self.order=='first': 158 | embeddings=self.embedding_dict['first'].get_weights()[0] 159 | elif self.order=='second': 160 | embeddings=self.embedding_dict['second'].get_weights()[0] 161 | else: 162 | embeddings = np.hstack((self.embedding_dict['first'].get_weights()[ 163 | 0], self.embedding_dict['second'].get_weights()[0])) 164 | idx2node = self.idx2node 165 | for i, embedding in enumerate(embeddings): 166 | self.embeddings[idx2node[i]] = embedding 167 | 168 | return self.embeddings 169 | 170 | def transform(self): 171 | self.train() 172 | self.get_embedding() 173 | return self.embeddings 174 | 175 | 176 | if __name__=='__main__': 177 | from util_tool import read_graph 178 | Graph=read_graph('model/embedding/wiki/Wiki_edgelist.txt') 179 | line=Line( 180 | Graph=Graph, 181 | dimension_size=128, 182 | per_vertex=100, 183 | walk_length=10, 184 | window_size=5, 185 | work=1, 186 | negative_ratio=1, 187 | batch_size=128, 188 | log_dir='model/embedding/setence_model/logs/0/', 189 | epoch=100, 190 | ) 191 | embeddings=line.transform() 192 | from evaluate import evaluate_tools 193 | tool=evaluate_tools(embeddings) 194 | tool.plot_embeddings() -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/best_weights.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/best_weights.h5 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011299.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011299.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011324.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011324.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011336.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565011336.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013918.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013918.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013943.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013943.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013958.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013958.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013985.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565013985.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014029.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014029.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014060.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014060.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014368.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014368.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014404.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014404.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014481.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014481.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014728.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014728.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014760.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014760.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014805.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565014805.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015151.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015151.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015263.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015263.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015277.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015277.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015308.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565015308.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565057550.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565057550.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058087.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058087.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058252.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058252.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058261.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058261.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058653.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058653.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058673.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058673.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058702.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565058702.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059234.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059234.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059587.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059587.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059681.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059681.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059708.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059708.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059726.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059726.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059768.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059768.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059787.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565059787.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060677.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060677.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060761.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060761.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060853.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565060853.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069889.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069889.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069922.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069922.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069970.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565069970.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070262.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070262.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070318.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070318.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070526.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070526.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070581.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070581.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070607.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070607.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070688.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070688.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070826.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070826.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070867.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070867.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070932.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070932.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070966.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070966.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070986.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565070986.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565071024.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565071024.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565162850.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565162850.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565165341.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565165341.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565168457.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565168457.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565170961.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565170961.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173560.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173560.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173578.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173578.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173609.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173609.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173761.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565173761.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174061.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174061.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174117.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174117.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174191.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174191.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174253.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174253.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174276.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174276.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174293.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174293.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174349.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174349.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174378.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565174378.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565179687.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565179687.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565182503.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565182503.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565182554.dream-System: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/events.out.tfevents.1565182554.dream-System -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277074.hjq-Precision-T7610.19721.672.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277074.hjq-Precision-T7610.19721.672.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277074.hjq-Precision-T7610.profile-empty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277074.hjq-Precision-T7610.profile-empty -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277128.hjq-Precision-T7610.20083.106.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277128.hjq-Precision-T7610.20083.106.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277719.hjq-Precision-T7610.20872.106.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277719.hjq-Precision-T7610.20872.106.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277787.hjq-Precision-T7610.21065.672.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577277787.hjq-Precision-T7610.21065.672.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278233.hjq-Precision-T7610.21443.672.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278233.hjq-Precision-T7610.21443.672.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278349.hjq-Precision-T7610.21613.672.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278349.hjq-Precision-T7610.21613.672.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278745.hjq-Precision-T7610.22262.672.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577278745.hjq-Precision-T7610.22262.672.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577279268.hjq-Precision-T7610.22939.672.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577279268.hjq-Precision-T7610.22939.672.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577279585.hjq-Precision-T7610.2711.672.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577279585.hjq-Precision-T7610.2711.672.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577280012.hjq-Precision-T7610.3191.672.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/events.out.tfevents.1577280012.hjq-Precision-T7610.3191.672.v2 -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-31-14/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-31-14/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-32-08/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-32-08/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-41-59/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-41-59/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-43-07/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-43-07/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-50-33/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-50-33/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-52-29/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-52-29/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-59-05/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_20-59-05/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-07-48/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-07-48/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-13-05/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-13-05/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-20-12/local.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/kon/model/embedding/setence_model/logs/0/train/plugins/profile/2019-12-25_21-20-12/local.trace -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/node2vec.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | from kon.model.embedding.setence_model.walk_core_model import core_model 3 | from kon.model.embedding.util.util_tool import read_graph 4 | from kon.model.embedding.util.evaluate import evaluate_tools 5 | from numpy import random 6 | 7 | class node2vec(core_model): 8 | 9 | def __init__(self,Graph,per_vertex,walk_length,window_size,dimension_size,work,p,q): 10 | super().__init__(Graph,per_vertex,walk_length,window_size,dimension_size,work) 11 | self.p=p 12 | self.q=q 13 | 14 | def Learn_Feature(self): 15 | self.Preprocess_Modified_Weights(self.p,self.q) 16 | sentence_list=[] 17 | for num in range(self.walk_epoch): 18 | random.shuffle(self.all_nodes) 19 | for node in self.all_nodes: 20 | sentence=self.random_walk(node,is_edge_sampling=True) 21 | sentence_list.append(sentence) 22 | 23 | return sentence_list 24 | 25 | def transform(self): 26 | sentence_list=self.Learn_Feature() 27 | embeddings=self.embdding_train(sentence_list) 28 | 29 | return embeddings 30 | 31 | 32 | if __name__=='__main__': 33 | Graph = read_graph('wiki/Wiki_edgelist.txt') 34 | 35 | node_vec= node2vec( 36 | Graph=Graph, 37 | per_vertex=80, 38 | walk_length=10, 39 | window_size=5, 40 | dimension_size=128, 41 | work=1, 42 | p=0.25, 43 | q=4 44 | ) 45 | 46 | embeddings=node_vec.transform() 47 | eval_tool=evaluate_tools(embeddings) 48 | eval_tool.plot_embeddings() 49 | -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/sdne.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | from kon.model.embedding.setence_model.walk_core_model import core_model 3 | from kon.model.embedding.util.util_tool import read_graph,get_node_information 4 | import numpy as np 5 | 6 | class sdne(core_model): 7 | 8 | def __init__(self, Graph, per_vertex, walk_length, window_size, dimension_size, work,alpha,beta,epochs,batch_size,verbose,hidden_size_list,l1,l2,log_dir): 9 | super().__init__(Graph, per_vertex, walk_length, window_size, dimension_size, work) 10 | self.alpha=alpha 11 | self.beta=beta 12 | self.batch_size=batch_size 13 | self.epochs=epochs 14 | self.verbose=verbose 15 | self.log_dir=log_dir 16 | self.pred_all_nodes=self.all_nodes 17 | self.idx2node, self.node2idx = get_node_information(self.pred_all_nodes) 18 | self.W,self.W_ = self.generator_adjacency_matrix(self.pred_all_nodes) 19 | self.L=self.generator_L(self.W_) 20 | self.model,self.embedding_model=self.creat_model(hidden_size_list=hidden_size_list,l1=l1,l2=l2) 21 | 22 | def generator_adjacency_matrix(self,all_nodes): 23 | numNodes=len(all_nodes) 24 | W=np.zeros((numNodes,numNodes)) 25 | W_=np.zeros((numNodes,numNodes)) 26 | 27 | for start_vertex in all_nodes: 28 | start_rank=self.node2idx[start_vertex] 29 | for end_vertex in list(self.G.neighbors(start_vertex)): 30 | end_rank=self.node2idx[end_vertex] 31 | weight=self.G[start_vertex][end_vertex].get('weight',1.0) 32 | W[start_rank][end_rank]=weight 33 | W_[start_rank][end_rank]=weight 34 | W_[end_rank][start_rank]=weight 35 | 36 | return W,W_ 37 | 38 | def generator_L(self,W_): 39 | D = np.zeros_like(W_) 40 | 41 | for i in range(len(W_)): 42 | D[i][i] = np.sum(W_[i]) 43 | L = D - W_ 44 | 45 | return L 46 | 47 | def generator_data(self): 48 | all_nodes=self.pred_all_nodes 49 | start_rank=0 50 | end_rank=min(self.batch_size,self.numNodes) 51 | 52 | while True: 53 | batch_nodes=all_nodes[start_rank:end_rank] 54 | node_index_list=[self.node2idx[node] for node in batch_nodes] 55 | 56 | batch_W=self.W[node_index_list,:] 57 | batch_L=self.L[node_index_list][:,node_index_list] 58 | 59 | input_=[batch_W,batch_L] 60 | 61 | yield (input_,input_) 62 | 63 | start_rank = end_rank 64 | end_rank += self.batch_size 65 | end_rank = min(end_rank, self.numNodes) 66 | 67 | if end_rank==self.numNodes: 68 | start_rank=0 69 | end_rank=min(self.batch_size,self.numNodes) 70 | np.random.shuffle(all_nodes) 71 | 72 | def train(self): 73 | self.model.compile('adam',[self.second_nd(self.beta),self.first_nd(self.alpha)]) 74 | self.model.fit_generator( 75 | self.generator_data(), 76 | steps_per_epoch=self.numNodes//self.batch_size, 77 | epochs=self.epochs, 78 | callbacks=self.model_prepare(self.log_dir), 79 | verbose=self.verbose 80 | ) 81 | return self.model 82 | 83 | def get_embeddings(self): 84 | embeddings={} 85 | pred_embeddings=self.embedding_model.predict(self.W,batch_size=self.batch_size) 86 | 87 | rank=0 88 | for embedding in pred_embeddings: 89 | embeddings[self.idx2node[rank]]=embedding 90 | rank+=1 91 | return embeddings 92 | 93 | if __name__=='__main__': 94 | Graph=read_graph() 95 | sden_model=sdne( 96 | Graph=Graph, 97 | dimension_size=128, 98 | per_vertex=100, 99 | walk_length=10, 100 | window_size=5, 101 | work=1, 102 | beta=5, 103 | alpha=1e-6, 104 | verbose=1, 105 | epochs=1000, 106 | batch_size=512, 107 | log_dir='model/embedding/setence_model/logs/0/', 108 | hidden_size_list=[256, 128], 109 | l1=1e-5, 110 | l2=1e-4 111 | ) 112 | 113 | sden_model.train() 114 | embeddings=sden_model.get_embeddings() 115 | 116 | from kon.model import evaluate_tools 117 | eval_tool=evaluate_tools(embeddings) 118 | eval_tool.plot_embeddings() 119 | 120 | 121 | -------------------------------------------------------------------------------- /kon/model/embedding/setence_model/walk_core_model.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import tensorflow as tf 3 | 4 | from keras.layers import Embedding,Input,Lambda,Dense 5 | from keras import backend as K 6 | from keras.optimizers import Adam 7 | from keras.callbacks import ReduceLROnPlateau,TensorBoard,EarlyStopping,ModelCheckpoint 8 | from keras.regularizers import l1_l2 9 | from keras import Model 10 | from numpy import random 11 | from kon.model.embedding.setence_model.backone_language_model import language_model 12 | from kon.model.embedding.setence_model.backone_optimize import optimize_funcation 13 | import numpy as np 14 | 15 | class core_model(object): 16 | 17 | def __init__(self,Graph,per_vertex,walk_length,window_size,dimension_size,work): 18 | self.G=Graph 19 | self.walk_epoch=per_vertex 20 | self.sentence_len=walk_length 21 | self.all_nodes=list(Graph.nodes()) 22 | self.all_edges=list(Graph.edges()) 23 | self.numEdges=Graph.number_of_edges() 24 | self.numNodes=Graph.number_of_nodes() 25 | self.dimension_size=dimension_size 26 | self.backone_model = language_model( 27 | dimension_size=dimension_size, 28 | window_size=window_size, 29 | work=work 30 | ) 31 | self.optimize_fun=optimize_funcation() 32 | 33 | #node2vec dfs,dps控制器 34 | def unnormalized_transition_probability(self,t,v,p,q): 35 | ''' 36 | :param v:目前所在顶点 37 | :param t: 上一次的节点 38 | : x:下一步的目标节点 39 | : x=t:d(tx)=0,1/p 40 | : t-x=1:d(tx)=1,1 41 | : else:d(tx)=2,1/q 42 | :return :edge_alias_table 43 | sampling weights:p(d(tx))*edge_weight 44 | ''' 45 | 46 | unnormalized_probs=[] 47 | 48 | for x in self.G.neighbors(v): 49 | weight=self.G[v][x].get('weight',1.0) 50 | if x==t: 51 | unnormalized_probs.append(weight/p) 52 | elif self.G.has_edge(x,t): 53 | unnormalized_probs.append(weight) 54 | else: 55 | unnormalized_probs.append(weight/q) 56 | norm_sum=sum(unnormalized_probs) 57 | all_probs=[float(un_prob)/norm_sum for un_prob in unnormalized_probs] 58 | 59 | edge_sample_table=self.optimize_fun.generate_alias_table(all_probs) 60 | 61 | return edge_sample_table 62 | 63 | def Preprocess_Modified_Weights(self,p,q): 64 | alias_nodes={} 65 | 66 | count=0 67 | for node in self.all_nodes: 68 | unnormalized_probs=[] 69 | for neighbor in self.G.neighbors(node): 70 | weight=self.G[node][neighbor].get('weight',1.0) 71 | unnormalized_probs.append(weight) 72 | 73 | norm_sum=sum(unnormalized_probs) 74 | all_probs=[float(un_probs)/norm_sum for un_probs in unnormalized_probs] 75 | alias_nodes[node]=self.optimize_fun.generate_alias_table(all_probability=all_probs) 76 | 77 | count+=1 78 | 79 | alias_edges={} 80 | 81 | for edge in self.all_edges: 82 | alias_edges[edge]=self.unnormalized_transition_probability(edge[0],edge[1],p,q) 83 | 84 | self.alias_nodes=alias_nodes 85 | self.alias_edges=alias_edges 86 | 87 | 88 | #deepwalk,node2vec core 89 | def random_walk(self,start_vertex,is_edge_sampling=False): 90 | node_sentence=[start_vertex] 91 | now_walk_len=1 92 | 93 | while now_walk_len0: 97 | if not is_edge_sampling: 98 | next_node=random.choice(neighborhood_list) 99 | node_sentence.append(next_node) 100 | else: 101 | if len(node_sentence)==1: 102 | next_node_rank=self.optimize_fun.alias_sample(prab=self.alias_nodes[now_node][0],alias=self.alias_nodes[now_node][1]) 103 | next_node=neighborhood_list[next_node_rank] 104 | node_sentence.append(next_node) 105 | else: 106 | pre_node=node_sentence[-2] 107 | edge=(pre_node,now_node) 108 | next_node_rank=self.optimize_fun.alias_sample(self.alias_edges[edge][0],alias=self.alias_edges[edge][1]) 109 | next_node=neighborhood_list[next_node_rank] 110 | node_sentence.append(next_node) 111 | now_walk_len+=1 112 | else: 113 | break 114 | 115 | return node_sentence 116 | 117 | #line_core 118 | def line_loss(self,y_true,y_pred): 119 | #在二阶有负样本,因为引入了-1的权重,故loss共用 120 | return -K.mean(K.log(K.sigmoid(y_true*y_pred))) 121 | 122 | def creat_line_model(self,order='second',lr=0.001): 123 | v_i = Input(shape=(1,)) 124 | v_j = Input(shape=(1,)) 125 | 126 | first_emb = Embedding(self.numNodes, self.dimension_size, name='first_emb') 127 | second_emb = Embedding(self.numNodes, self.dimension_size, name='second_emb') 128 | context_emb = Embedding(self.numNodes, self.dimension_size, name='context_emb') 129 | 130 | v_i_emb = first_emb(v_i) 131 | v_j_emb = first_emb(v_j) 132 | 133 | v_i_emb_second = second_emb(v_i) 134 | v_j_context_emb = context_emb(v_j) 135 | 136 | first = Lambda(lambda x: tf.reduce_sum( 137 | x[0] * x[1], axis=-1), name='first_order')([v_i_emb, v_j_emb]) 138 | second = Lambda(lambda x: tf.reduce_sum( 139 | x[0] * x[1], axis=-1), name='second_order')([v_i_emb_second, v_j_context_emb]) 140 | 141 | if order == 'first': 142 | output_list = [first] 143 | elif order == 'second': 144 | output_list = [second] 145 | else: 146 | output_list = [first, second] 147 | 148 | model = Model(inputs=[v_i, v_j], outputs=output_list) 149 | 150 | adam=Adam(lr=lr) 151 | model.compile(optimizer=adam,loss=self.line_loss) 152 | 153 | self.embedding_dict = {'first': first_emb, 'second': second_emb} 154 | 155 | return model 156 | 157 | #sdne 158 | def first_nd(self, alpha): 159 | def first_loss(y_true, y_pred): 160 | loss = 2 * alpha * tf.linalg.trace(tf.matmul(tf.matmul(y_pred, y_true, transpose_a=True), y_pred)) 161 | return loss / tf.to_float(K.shape(y_pred)[0]) 162 | 163 | return first_loss 164 | 165 | def second_nd(self, beta): 166 | def second_loss(y_true, y_pred): 167 | b_ = np.ones_like(y_true) 168 | b_[y_true != 0] = beta 169 | loss = K.sum(K.square((y_true - y_pred) * b_), axis=-1) 170 | return K.mean(loss) 171 | 172 | return second_loss 173 | 174 | def encoder(self, x, hidden_size_list, l1, l2): 175 | for i in range(len(hidden_size_list) - 1): 176 | x = Dense(units=hidden_size_list[i], activation='relu', kernel_regularizer=l1_l2(l1, l2))(x) 177 | y = Dense(units=hidden_size_list[-1], activation='relu', kernel_regularizer=l1_l2(l1, l2), name='encode')(x) 178 | 179 | return y 180 | 181 | def decoder(self, y, hidden_size_list, l1, l2): 182 | for i in reversed(range(len(hidden_size_list) - 1)): 183 | y = Dense(units=hidden_size_list[i], activation='relu', kernel_regularizer=l1_l2(l1, l2))(y) 184 | x = Dense(units=self.numNodes, activation='relu', name='decode')(y) 185 | 186 | return x 187 | 188 | def creat_model(self, hidden_size_list, l1, l2): 189 | adjacency_matrix = Input(shape=(self.numNodes,)) 190 | L = Input(shape=(None,)) 191 | x = adjacency_matrix 192 | 193 | y = self.encoder(x, hidden_size_list, l1, l2) 194 | x_ = self.decoder(y, hidden_size_list, l1, l2) 195 | 196 | model = Model(inputs=[adjacency_matrix, L], outputs=[x_, y]) 197 | emb = Model(inputs=adjacency_matrix, outputs=y) 198 | 199 | return model,emb 200 | 201 | 202 | #callback 203 | def model_prepare(self,log_dir): 204 | tensorboard=TensorBoard(log_dir=log_dir) 205 | 206 | checkpoint=ModelCheckpoint( 207 | log_dir+'best_weights.h5', 208 | monitor='loss', 209 | save_best_only=True, 210 | save_weights_only=True, 211 | verbose=1, 212 | period=1 213 | ) 214 | 215 | earlystop=EarlyStopping( 216 | monitor='loss', 217 | patience=50 218 | ) 219 | 220 | reduce_lr=ReduceLROnPlateau( 221 | monitor='loss', 222 | patience=1, 223 | factor=0.1 224 | ) 225 | 226 | callback_list=[tensorboard,checkpoint,earlystop,reduce_lr] 227 | return callback_list 228 | 229 | #language model(netivate_skig_model) 230 | def embdding_train(self,sentence_list): 231 | 232 | print('begin train embedding') 233 | print('loading...') 234 | 235 | model=self.backone_model.word2vec_on_train(sentence_list) 236 | 237 | print('train ending') 238 | 239 | embeddings={} 240 | for node in self.all_nodes: 241 | embeddings[node]=model.wv[node] 242 | 243 | return embeddings 244 | -------------------------------------------------------------------------------- /kon/model/embedding/util/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/7/21 上午9:00 6 | @File :__init__.py.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' -------------------------------------------------------------------------------- /kon/model/embedding/util/evaluate.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import numpy as np 3 | from sklearn.manifold import TSNE 4 | from kon.model.embedding.util.util_tool import read_node_label 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | class evaluate_tools(): 9 | def __init__(self,embeddings,label_path='../wiki/Wiki_labels.txt'): 10 | self.data=embeddings 11 | self.X,self.y=read_node_label(label_path) 12 | 13 | # =read_label(label_path) 14 | 15 | def plot_embeddings(self): 16 | emb_list = [] 17 | for k in self.X: 18 | emb_list.append(self.data[k]) 19 | emb_list = np.array(emb_list) 20 | print(emb_list) 21 | 22 | model = TSNE(n_components=2) 23 | node_pos = model.fit_transform(emb_list) 24 | 25 | 26 | color_idx = {} 27 | for i in range(len(self.X)): 28 | color_idx.setdefault(self.y[i][0], []) 29 | color_idx[self.y[i][0]].append(i) 30 | 31 | for c, idx in color_idx.items(): 32 | plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) 33 | plt.legend() 34 | plt.show() 35 | -------------------------------------------------------------------------------- /kon/model/embedding/util/test.txt: -------------------------------------------------------------------------------- 1 | 1 2 2 | 4 3 3 | 5 6 4 | 2 3 5 | 2 1 6 | 3 5 7 | 1 2 8 | -------------------------------------------------------------------------------- /kon/model/embedding/util/util_tool.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import os 3 | import networkx as nx 4 | import pandas as pd 5 | 6 | 7 | def get_node_information(all_nodes): 8 | node2idx = {} 9 | idx2node = [] 10 | node_size = 0 11 | for node in all_nodes: 12 | node2idx[node] = node_size 13 | idx2node.append(node) 14 | node_size += 1 15 | idx2node = idx2node 16 | node2idx = node2idx 17 | return idx2node, node2idx 18 | 19 | def save_edgelist(edgelist_list,save_path): 20 | if os.path.exists(save_path): 21 | os.remove(save_path) 22 | 23 | file=open(save_path,mode='a+') 24 | for edgelist in edgelist_list: 25 | file.writelines(edgelist) 26 | 27 | def read_graph(edgelist_path='../wiki/Wiki_edgelist.txt'): 28 | DG=nx.read_edgelist( 29 | edgelist_path, 30 | create_using=nx.DiGraph(), 31 | nodetype=None, 32 | data=[('weight',int)] 33 | ) 34 | 35 | return DG 36 | 37 | def read_node_label(filename, skip_head=False): 38 | fin = open(filename, 'r') 39 | X = [] 40 | Y = [] 41 | while 1: 42 | if skip_head: 43 | fin.readline() 44 | l = fin.readline() 45 | if l == '': 46 | break 47 | vec = l.strip().split(' ') 48 | X.append(vec[0]) 49 | Y.append(vec[1:]) 50 | fin.close() 51 | return X, Y 52 | 53 | def read_label(label_path): 54 | data=pd.read_csv(label_path,header=None,sep=' ') 55 | nodes=data[0].tolist() 56 | label=data[1].tolist() 57 | 58 | return nodes,label 59 | 60 | if __name__=='__main__': 61 | pass 62 | # edgelist_list=['1 2\n','4 3\n','5 6\n','2 3\n','2 1\n','3 5\n','1 2\n'] 63 | # save_path='kon.txt' 64 | # save_edgelist(edgelist_list,save_path) 65 | # read_graph(save_path) 66 | 67 | -------------------------------------------------------------------------------- /kon/model/feature_eng/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/6/24 上午12:15 6 | @File :__init__.py.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' -------------------------------------------------------------------------------- /kon/model/feature_eng/base_model.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | '''================================= 3 | @Author :tix_hjq 4 | @Date :19-10-30 下午9:36 5 | =================================''' 6 | from sklearn.model_selection import KFold, StratifiedKFold 7 | from sklearn.metrics import mean_squared_error as mse 8 | from sklearn.metrics import f1_score, r2_score 9 | from numpy.random import random, shuffle 10 | import matplotlib.pyplot as plt 11 | from pandas import DataFrame 12 | from tqdm import tqdm 13 | import lightgbm as lgb 14 | import pandas as pd 15 | import numpy as np 16 | import warnings 17 | import os 18 | import gc 19 | import re 20 | import datetime 21 | import sys 22 | 23 | warnings.filterwarnings("ignore") 24 | 25 | pd.set_option('display.max_columns', None) 26 | pd.set_option('display.max_rows', None) 27 | pd.set_option('max_colwidth', 100) 28 | 29 | print(os.getcwd()) 30 | #---------------------------------------------------- 31 | class base_model(): 32 | def __init__(self,save_folder,random_state=2048): 33 | print('base model is backend') 34 | self.random_state=random_state 35 | self.save_folder=save_folder 36 | 37 | def model_fit(self,X_train,y_train,cate_fea,X_vail,y_vail,is_pred=True,test_data=None,loss=['cross_entropy','binary'],is_classiffy=True,threshold=0.103): 38 | if is_classiffy: 39 | loss=loss[0] 40 | else: 41 | loss=loss[1] 42 | 43 | lgb_model = lgb.LGBMRegressor( 44 | num_leaves=40, reg_alpha=1, reg_lambda=0.1, objective=loss, 45 | max_depth=-1, learning_rate=0.05, min_child_samples=5, random_state=self.random_state, 46 | n_estimators=8000, subsample=0.8, colsample_bytree=0.8,is_unbalance=True, 47 | device='gpu' 48 | # n_jobs=-1 49 | ) 50 | 51 | lgb_model.fit(X_train,y_train,eval_set=[(X_vail,y_vail)],eval_metric='auc', 52 | categorical_feature=cate_fea, 53 | early_stopping_rounds=300,verbose=10) 54 | 55 | result_weight=lgb_model.best_score_['valid_0']['auc'] 56 | # result_weight=lgb_model.best_score_['training']['binary_logloss'] 57 | 58 | model_import = DataFrame() 59 | model_import['feature'] = X_train.columns.tolist() 60 | model_import['feature_importance'] = lgb_model.feature_importances_ 61 | model_import['model_weight'] = result_weight 62 | model_import.sort_values(by=['feature_importance'], ascending=False, inplace=True) 63 | zero_fea_list = model_import[model_import['feature_importance'] != 0]['feature'].tolist() 64 | 65 | print(model_import.head()) 66 | print('-------------------------------') 67 | 68 | if is_classiffy: 69 | vail_y_pred = lgb_model.predict(X_vail, num_iteration=lgb_model.best_iteration_) 70 | vail_result = DataFrame(data=vail_y_pred, columns=['vail_pred']) 71 | vail_result['y_vail'] = y_vail 72 | vail_result.sort_values(['vail_pred'], ascending=False, inplace=True) 73 | vail_result.reset_index(inplace=True) 74 | 75 | del vail_result['index'] 76 | gc.collect() 77 | 78 | vail_result.loc[vail_result.index <= int(vail_result.shape[0] * threshold), 'vail_pred'] = 1 79 | vail_result.loc[vail_result.vail_pred != 1, 'vail_pred'] = 0 80 | print(vail_result.head()) 81 | try: 82 | print(f1_score(y_pred=vail_result['vail_pred'].tolist(),y_true=vail_result['y_vail'].tolist())) 83 | except ValueError: 84 | print('ERROR') 85 | del vail_result 86 | 87 | if is_pred==True: 88 | result_data = np.array(lgb_model.predict(test_data, num_iteration=lgb_model.best_iteration_ + 10)) 89 | result_=DataFrame(columns=['result'],data=result_data) 90 | result_['weight']=result_weight 91 | return result_,zero_fea_list,model_import 92 | return zero_fea_list,model_import 93 | 94 | 95 | 96 | def avg_model_pred(self,result_data,n_split,test_data,is_plot=True,is_avg=True): 97 | print(result_data.head()) 98 | 99 | # cal weight_avg_result 100 | result_cols = [] 101 | weight_cols = [] 102 | for i in range(0, n_split): 103 | result_cols.append('result_' + str(i)) 104 | weight_cols.append('weight_' + str(i)) 105 | 106 | result_data['result'] = 0 107 | 108 | for w_col, r_col in zip(weight_cols, result_cols): 109 | if not is_avg: 110 | result_data[w_col] /= result_data['weight'] 111 | else: 112 | result_data[w_col]=1/n_split 113 | print(result_data[w_col].head()) 114 | result_data[r_col] *= result_data[w_col] 115 | 116 | for col in result_cols: 117 | result_data['result'] += result_data[col] 118 | 119 | score = result_data['weight'].unique().tolist()[0] / n_split 120 | 121 | submit_data = DataFrame() 122 | submit_data['ID'] = test_data.ID.tolist() 123 | submit_result = [] 124 | 125 | for r in result_data.result: 126 | if r <= 0: 127 | submit_result.append(0.1) 128 | else: 129 | submit_result.append(r) 130 | submit_data['Label'] = submit_result 131 | 132 | del result_data 133 | gc.collect() 134 | 135 | print('model_score:{}'.format(score)) 136 | 137 | if is_plot: 138 | data = DataFrame(submit_data.Label.value_counts()).reset_index() 139 | plt.bar(data['index'], data['Label']) 140 | 141 | return submit_data,score 142 | 143 | 144 | def n_fold_fit(self,train_data,cols,cate_col,test_data=None,label_col='Label',is_pred=True): 145 | #train by k_fold 146 | result_data=DataFrame() 147 | if is_pred: 148 | result_data['weight']=[0]*test_data.shape[0] 149 | fea_filter =[] 150 | n_split=10 151 | rank=0 152 | 153 | k=StratifiedKFold(n_splits=n_split,random_state=self.random_state,shuffle=True) 154 | 155 | all_feature_important=DataFrame() 156 | all_feature_important['feature']=cols 157 | for train_idx,test_idx in tqdm(k.split(train_data[cols],train_data[label_col]),desc='k_split_fitting'): 158 | X_train=train_data[cols].loc[train_idx] 159 | X_vail=train_data[cols].loc[test_idx] 160 | 161 | y_train=train_data[[label_col]].loc[train_idx] 162 | y_vail=train_data[[label_col]].loc[test_idx] 163 | 164 | if is_pred: 165 | result_,zero_fea,feature_important=self.model_fit(X_train=X_train,y_train=y_train,X_vail=X_vail,y_vail=y_vail,test_data=test_data[cols],cate_fea=cate_col,is_pred=is_pred) 166 | result_data['result_'+str(rank)]=result_['result'] 167 | result_data['weight_'+str(rank)]=result_['weight'] 168 | result_data['weight']+=result_['weight'] 169 | del result_ 170 | gc.collect() 171 | 172 | if not is_pred: 173 | zero_fea,feature_important=self.model_fit(X_train=X_train,y_train=y_train,X_vail=X_vail,y_vail=y_vail,cate_fea=cate_col,is_pred=is_pred) 174 | 175 | feature_important.columns=['feature']+[str(col)+'_'+str(rank) for col in feature_important.columns.tolist()[1:]] 176 | all_feature_important=all_feature_important.merge(feature_important,'left',on=['feature']) 177 | fea_filter.append(zero_fea) 178 | rank+=1 179 | 180 | np.save(self.save_folder+'zero_feature',fea_filter) 181 | 182 | return result_data,n_split,all_feature_important,fea_filter 183 | 184 | def save_feature_submit(self,submit_data,score,cols,cate_fea): 185 | cate_fea_label = [] 186 | for col in cols: 187 | if col in cate_fea: 188 | cate_fea_label.append(1) 189 | else: 190 | cate_fea_label.append(0) 191 | 192 | model_features = DataFrame() 193 | model_features['cols'] = cols 194 | model_features['is_cate'] = cate_fea_label 195 | 196 | feature_path = self.save_folder+'use_feature/' + str(datetime.datetime.now().date()) + '/' 197 | result_path = self.save_folder+ 'result/' + str(datetime.datetime.now().date()) + '/' 198 | 199 | for path in [feature_path, result_path]: 200 | if not os.path.exists(path): 201 | os.mkdir(path) 202 | 203 | model_features.to_csv( 204 | feature_path + 'model_feature_' + str(datetime.datetime.now()) + '_' + str(score) + '.csv', index=None) 205 | 206 | submit_data.drop(columns=['weight'],inplace=True) 207 | submit_data.to_csv(result_path + 'submit_' + str(datetime.datetime.now()) + '_' + str(score) + '.csv', 208 | index=None) 209 | 210 | def fit_transform(self,train_data:DataFrame,use_cols,cate_cols,label_col:str,test_data=None,is_pred=True): 211 | if is_pred: 212 | result_data,n_split,feature_important,zero_fea=self.n_fold_fit(train_data=train_data,test_data=test_data,label_col=label_col,cols=use_cols,cate_col=cate_cols,is_pred=is_pred) 213 | submit_data,score=self.avg_model_pred(result_data=result_data,n_split=n_split,test_data=test_data) 214 | self.save_feature_submit(score=score,submit_data=submit_data,cate_fea=cate_cols,cols=use_cols) 215 | else: 216 | result_data,n_split,feature_important,zero_fea=self.n_fold_fit(train_data=train_data,label_col=label_col,is_pred=is_pred,cols=use_cols,cate_col=cate_cols) 217 | return feature_important,zero_fea 218 | 219 | def single_fit_transform(self,X_train,y_train,X_vail,y_vail,cate_cols,test_data,pred_id,is_classiffy=True,threshold=0.103): 220 | result_, zero_fea_list, model_import=self.model_fit(X_train,y_train,cate_cols,X_vail,y_vail,test_data=test_data,is_classiffy=is_classiffy,threshold=threshold) 221 | result_['id']=pred_id 222 | score=result_['weight'].unique().tolist()[0] 223 | cols=X_train.columns.tolist() 224 | self.save_feature_submit(score=score, submit_data=result_, cate_fea=cate_cols, cols=cols) 225 | 226 | if __name__=='__main__': 227 | data_folder ='../../data/' 228 | submit_data_folder = data_folder + 'submit_data/' 229 | 230 | from sklearn.datasets import load_iris 231 | iris = load_iris() 232 | train_data=iris.data 233 | target_data=iris.target 234 | train_fea=iris.feature_names 235 | train_data=DataFrame(data=train_data,columns=train_fea) 236 | target_data=DataFrame(data=target_data,columns=['target']) 237 | from sklearn.model_selection import train_test_split 238 | X_train,X_test,y_train,y_test=train_test_split(train_data,target_data,test_size=0.3,random_state=2048) 239 | base_model(submit_data_folder).single_fit_transform(X_train,y_train,X_test,y_test,cate_cols=[],test_data=train_data,pred_id=train_data.index.tolist(),is_classiffy=False,threshold=0.103) -------------------------------------------------------------------------------- /kon/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/5/29 下午4:03 6 | @File :__init__.py.py 7 | =================================''' 8 | 9 | -------------------------------------------------------------------------------- /kon/utils/data_prepare.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | '''================================= 3 | @Author :tix_hjq 4 | @Date :2020/5/2 下午5:14 5 | @File :data_prepare.py 6 | =================================''' 7 | from sklearn.preprocessing import LabelEncoder 8 | from numpy.random import random, shuffle 9 | from pandas import DataFrame 10 | import tensorflow as tf 11 | from sklearn.preprocessing import MinMaxScaler 12 | import pandas as pd 13 | import numpy as np 14 | import warnings 15 | import os 16 | from collections import namedtuple 17 | 18 | from kon.model.ctr_model.layer.interactive_layer.interactive_layer import SparseEmbed 19 | from kon.model.feature_eng.feature_transform import feature_tool 20 | from kon.model.feature_eng.base_model import base_model 21 | import multiprocessing as mp 22 | 23 | warnings.filterwarnings("ignore") 24 | pd.set_option('display.max_columns', None) 25 | pd.set_option('display.max_rows', None) 26 | pd.set_option('max_colwidth', 100) 27 | 28 | print(os.getcwd()) 29 | #---------------------------------------------------- 30 | data_folder = '../../data/' 31 | origin_data_folder = data_folder + 'origin_data/' 32 | submit_data_folder = data_folder + 'submit_data/' 33 | eda_data_folder = data_folder + 'eda_data/' 34 | fea_data_folder = data_folder + 'fea_data/' 35 | #----------------------------------------------------------------- 36 | model_tool = base_model(submit_data_folder) 37 | fea_tool = feature_tool(fea_data_folder) 38 | #----------------------------------------------------------------- 39 | class InputFeature(object): 40 | def __init__(self,denseInfo:list=None,sparseInfo:list=None,seqInfo:list=None,denseInputs:list=None,sparseInputs:list=None,seqInputs:list=None,linearEmbed:list=None,sparseEmbed:list=None,seqEmbedList:list=None): 41 | self.dense_info=denseInfo 42 | self.sparse_info=sparseInfo 43 | self.seq_info=seqInfo 44 | self.dense_inputs=denseInputs 45 | self.sparse_inputs=sparseInputs 46 | self.seq_inputs=seqInputs 47 | self.linear_embed=linearEmbed 48 | self.sparse_embed=sparseEmbed 49 | self.seq_embed_list=seqEmbedList 50 | 51 | def toList(self): 52 | from pprint import pprint 53 | pprint([self.dense_info,self.sparse_info,self.seq_info,self.dense_inputs,self.sparse_inputs, 54 | self.seq_inputs,self.linear_embed,self.sparse_embed,self.seq_embed_list]) 55 | 56 | class data_prepare(object): 57 | def __init__(self,batch_size=None,use_shuffle=True,cpu_core=None): 58 | print('data prepare is backend') 59 | self.sparseFea=namedtuple('sparseFea',['fea_name','word_size','input_dim','cross_unit','linear_unit','pre_weight','mask_zero','is_trainable','input_length','sample_num','batch_size','emb_reg']) 60 | self.denseFea=namedtuple('denseFea',['fea_name','batch_size']) 61 | self.batch_size=batch_size 62 | self.use_shuffle=use_shuffle 63 | self.cpu_core=mp.cpu_count() if cpu_core==None else cpu_core 64 | 65 | def FeatureInput(self,sparseInfo:list=None, denseInfo:list=None, seqInfo=None,useLinear:bool=False,useAddLinear:bool=False,useFlattenLinear:bool=False,useFlattenSparse:bool=False): 66 | 67 | linearEmbed,sparseEmbed,seqEmbed, seqMask=None,None,None,None 68 | [denseInputs, sparseInputs, seqInputs] =self.df_prepare(sparseInfo=sparseInfo, denseInfo=denseInfo,seqInfo=seqInfo) 69 | if useLinear: 70 | linearEmbed = SparseEmbed(sparseInfo, use_flatten=useFlattenLinear, is_linear=True,use_add=useAddLinear)(sparseInputs) 71 | if sparseInputs: 72 | sparseEmbed = SparseEmbed(sparseInfo, use_flatten=useFlattenSparse)(sparseInputs) 73 | if seqInputs: 74 | seqEmbed,seqMask= SparseEmbed(seqInfo,support_masking=True,mask_zero=True,is_linear=False,use_flatten=False)(seqInputs) 75 | 76 | return InputFeature(denseInfo,sparseInfo,seqInfo,denseInputs,sparseInputs,seqInputs,linearEmbed,sparseEmbed,[seqEmbed,seqMask]) 77 | 78 | def concat_test_train(self, train_df: DataFrame, test_df: DataFrame): 79 | train_idx = train_df.index.tolist() 80 | test_idx = list(np.array(test_df.index) + train_idx[-1] + 1) 81 | df = pd.concat([train_df, test_df], ignore_index=True) 82 | 83 | return df, (train_idx, test_idx) 84 | 85 | def sparse_fea_deal(self,sparseDf:DataFrame,embed_dim=8,linear_dim=1,pre_weight=None,emb_reg=None): 86 | if not pre_weight: 87 | pre_weight=[None]*sparseDf.shape[1] 88 | if not emb_reg: 89 | emb_reg=[1e-8]*sparseDf.shape[1] 90 | 91 | sparseDf = sparseDf.fillna('-1') 92 | for fea in sparseDf: 93 | sparseDf[fea]=LabelEncoder().fit_transform(sparseDf[fea].astype('str')) 94 | 95 | sparseInfo=[self.sparseFea( 96 | fea_name=fea, input_dim=sparseDf[fea].shape[0], 97 | cross_unit=embed_dim, linear_unit=linear_dim,word_size=sparseDf[fea].nunique(), 98 | pre_weight=weight_,input_length=1,is_trainable=True,mask_zero=False,sample_num=None, 99 | batch_size=self.batch_size,emb_reg=reg 100 | ) for fea,weight_,reg in zip(sparseDf,pre_weight,emb_reg)] 101 | 102 | return sparseDf,sparseInfo 103 | 104 | def single_seq_deal(self,seq_list, is_str_list=True,is_str=False,max_len=None,sample_num=None): 105 | ''' 106 | :param is_str_list: 107 | format:"[[1,2,3],[1,2,3]]"==>True 108 | else: 109 | format:[[1,2,3],[1,2,3]]==>False 110 | :param is_str: 111 | format: ['1,2','3'] 112 | ''' 113 | sample_seq={} 114 | 115 | if is_str_list: 116 | seq_list = fea_tool.batch_convert_list(seq_list) 117 | if is_str: 118 | seq_list = [str(i).split(',') for i in seq_list] 119 | 120 | w2str = [[str(j) for j in i] for i in seq_list] 121 | seq = [" ".join(i) for i in w2str] 122 | 123 | token = tf.keras.preprocessing.text.Tokenizer(lower=False, char_level=False, split=' ') 124 | token.fit_on_texts(seq) 125 | format_seq = token.texts_to_sequences(seq) 126 | format_seq = tf.keras.preprocessing.sequence.pad_sequences(format_seq, maxlen=max_len,value=0) 127 | seq_idx = token.word_index 128 | 129 | # if sample_num: 130 | # sample_seq=[[[label]+list(np.random.choice([i for i in seq if i!=label and i!=0],size=sample_num)) if label!=0 else [] 131 | # for label in seq]for seq in format_seq] 132 | 133 | return (format_seq, seq_idx,sample_seq) 134 | 135 | 136 | def hard_search(self,seqData:list,seqCate:list,targetCate:list)->(list,list): 137 | ''' 138 | SIM HardSearch[same cate to similar] 139 | :return format(reduce seq,reduce seq cate) 140 | ''' 141 | aimIdx=[[idx_ for idx_,cate_ in enumerate(cateList) if cate_==aimCate] 142 | for cateList, aimCate in zip(seqCate, targetCate)] 143 | aimList=np.array([[np.array(seq)[idx_],np.array(cate)[idx_]] 144 | if idx_!=[] else [[],[]] for seq,cate,idx_ in zip(seqData,seqCate,aimIdx)]) 145 | seqData,seqCate=np.split(aimList,[1],axis=1) 146 | 147 | return seqData,seqCate 148 | 149 | 150 | def seq_deal(self,seqDf,embedding_dim:list,max_len:list=None,is_str_list=True,is_str=False,mask_zero=True,is_trainable=True,pre_weight:list=None,sample_num=None,use_wrap=True,emb_reg=None): 151 | ''' 152 | notice: 153 | <1> seqDf: 154 | format===>single_seq_deal 155 | <2> preEmbeddingWeight: 156 | format===>[[fea1_weight],[fea2_weight]...] 157 | <3> sample_num: 158 | notice: 159 | negative must make sure len(seq)>=negative_num+1 160 | e.g 161 | negative_num:5==>get(5*negative sample) 162 | 163 | :param max_len: seq max length 164 | :param embedding_dim: seq embed dim 165 | :param is_str_list&is_str==> single_seq_deal introduce 166 | :param mask_zero: use mask==True 167 | :param is_trainable: use embed trainable==True 168 | :param pre_weight: embedding pre-train(e.g w2c as backend) 169 | :param use_wrap: use sparseFea wrap==True 170 | :return:seqDf,seqIdx,seqInfo 171 | ''' 172 | 173 | if not pre_weight: 174 | pre_weight=[None]*seqDf.shape[1] 175 | if not max_len: 176 | max_len=[None]*seqDf.shape[1] 177 | if not emb_reg: 178 | emb_reg=[1e-8]*seqDf.shape[1] 179 | 180 | seq_tuple={ 181 | seq_fea:self.single_seq_deal(seqDf[seq_fea],is_str_list=is_str_list,is_str=is_str,max_len=len_,sample_num=sample_num) 182 | for seq_fea,len_ in zip(seqDf,max_len)} 183 | seqDf={key:seq_tuple[key][0] for key in seq_tuple} 184 | seqIdx = {key: seq_tuple[key][1] for key in seq_tuple} 185 | 186 | sample_seq = None 187 | if sample_num: 188 | sample_seq={key:[i[1:] for i in seq_tuple[key][2]] for key in seq_tuple} 189 | del seq_tuple 190 | 191 | seqInfo=None 192 | if use_wrap: 193 | seqDf,seqInfo=self.sparse_wrap(seqDf,seqIdx=seqIdx,embedding_dim=embedding_dim,max_len=max_len,mask_zero=mask_zero,is_trainable=is_trainable,pre_weight=pre_weight,sample_num=sample_num,emb_reg=emb_reg) 194 | 195 | return seqDf,seqIdx,seqInfo 196 | 197 | def sparse_wrap(self,seqDf,embedding_dim:list,seqIdx=None,seqIdx_path=None,max_len:list=None,mask_zero=True,is_trainable=True,pre_weight:list=None,sample_num=None,emb_reg=None): 198 | 199 | if not pre_weight: 200 | pre_weight=[None]*seqDf.shape[1] 201 | if not max_len: 202 | max_len=[None]*seqDf.shape[1] 203 | if seqIdx_path: 204 | seqIdx = fea_tool.pickle_op(seqIdx_path, is_save=False) 205 | if emb_reg==None: 206 | emb_reg=[1e-8]*seqDf.shape[1] 207 | 208 | seqInfo = [self.sparseFea( 209 | fea_name=seq_fea, word_size=len(seqIdx[seq_key].keys()) + 1, input_dim=seqDf[seq_fea].shape[0], 210 | cross_unit=embed_, linear_unit=1, pre_weight=weight_, mask_zero=mask_zero, 211 | is_trainable=is_trainable, input_length=max_, sample_num=sample_num,batch_size=self.batch_size,emb_reg=reg 212 | ) for seq_fea, seq_key, weight_, max_, embed_,reg in zip(seqDf, seqIdx, pre_weight, max_len, embedding_dim,emb_reg)] 213 | 214 | if not isinstance(seqDf,dict): 215 | seqDf={fea:np.array([[int(j) for j in i.split(',')]for i in seqDf[fea].values]) for fea in seqDf} 216 | 217 | return seqDf,seqInfo 218 | 219 | def generator_session(self,df, group_cols: list, item_cols: str, 220 | session_maxLen, use_check=False): 221 | ''' 222 | :param df: 223 | format: 224 | user_id time item 225 | 1 1 1 226 | :param group_cols: 227 | format: list ==> [user,time] 228 | [groupby sign index:user_id,groupby time index:session split time] 229 | :param item_cols: 230 | item cols 231 | :param use_check: 232 | print=>session size distribute,can to find session_maxLen 233 | :return: 234 | DataFrame==> columns=user_id,session_list 235 | ''' 236 | 237 | if use_check: 238 | def need_(x): 239 | return len(x.tolist()) 240 | 241 | print(df.groupby(group_cols)[item_cols].agg(need_).reset_index()[item_cols].value_counts().head(20)) 242 | else: 243 | def session_seq(x): 244 | return ','.join(x.tolist()) 245 | 246 | df = df.groupby(group_cols)[item_cols].agg(session_seq).reset_index().rename( 247 | columns={item_cols: '{}_session'.format(item_cols)}) 248 | df['{}_session'.format(item_cols)] = [','.join([str(j) for j in i]) for i in 249 | tf.keras.preprocessing.sequence.pad_sequences( 250 | [i.split(',') for i in df['{}_session'.format(item_cols)]], 251 | maxlen=session_maxLen)] 252 | del df[group_cols[1]] 253 | 254 | return df 255 | 256 | def generator_seq(self,df, group_cols, item_cols, session_maxLen, session_maxNum, use_check=False): 257 | ''' 258 | :param df:df['user_id','{}_session'] 259 | :param group_cols: same to generator_session 260 | :param item_cols: same to generator_session 261 | :param session_maxLen: same to generator_session 262 | :param session_maxNum: same to generator_session 263 | :param use_check: same to generator_session 264 | :return: df==> 265 | ''' 266 | if use_check: 267 | def need_(x): 268 | return len(x.tolist()) 269 | 270 | print(df.groupby([group_cols[0]])['{}_session'.format(item_cols)].agg( 271 | need_).reset_index().click_item_session.value_counts().head(20)) 272 | else: 273 | def seq(x): 274 | use_list = x.tolist() 275 | if len(use_list) > session_maxNum: 276 | use_list = use_list[:session_maxNum] 277 | else: 278 | use_list += [','.join([str(i) for i in [0] * session_maxLen]) for i in 279 | range(session_maxNum - len(use_list))] 280 | 281 | need_list = "" 282 | for i in use_list: 283 | need_list += i + ',' 284 | return need_list[:-1] 285 | 286 | df = df.groupby([group_cols[0]])['{}_session'.format(item_cols)].agg(seq).reset_index() 287 | 288 | return df 289 | 290 | def sparse_prepare(self, sparse_info: list): 291 | return [tf.keras.Input(batch_shape=(info_.batch_size,info_.input_length,), 292 | name=info_.fea_name) for info_ in sparse_info] 293 | 294 | def dense_fea_deal(self,denseDf:DataFrame,is_fillna=True): 295 | if is_fillna: 296 | denseDf = DataFrame({fea: denseDf[fea].fillna(denseDf[fea].mode()[0]) for fea in denseDf}) 297 | [denseDf[fea].fillna(denseDf[fea].mode()[0])for fea in denseDf] 298 | denseDf[denseDf.columns.tolist()]=MinMaxScaler(feature_range=(0,1)).fit_transform(denseDf) 299 | denseInfo=[self.denseFea(fea,self.batch_size) for fea in denseDf] 300 | 301 | return denseDf,denseInfo 302 | 303 | def dense_prepare(self,dense_info:list): 304 | return [tf.keras.Input(batch_shape=(info_.batch_size,1,), name=info_.fea_name)for info_ in dense_info] 305 | 306 | def df_format(self,df:DataFrame): 307 | df_={} 308 | for fea in df: 309 | df_.update({fea:df[fea].values}) 310 | return df_ 311 | 312 | def df_format_input(self,df:list): 313 | df=pd.concat(df,axis=1) 314 | df=self.df_format(df) 315 | return df 316 | 317 | def df_prepare(self,sparseInfo:list=None,denseInfo:list=None,seqInfo:list=None): 318 | df_name=[] 319 | inputs=[[],[],[]] 320 | if denseInfo: 321 | dense_inputs=self.dense_prepare(denseInfo) 322 | df_name+=[info_.fea_name for info_ in denseInfo] 323 | inputs[0]=dense_inputs 324 | if sparseInfo: 325 | sparse_inputs=self.sparse_prepare(sparseInfo) 326 | df_name+=[info_.fea_name for info_ in sparseInfo] 327 | inputs[1]=sparse_inputs 328 | if seqInfo: 329 | seq_inputs=self.sparse_prepare(seqInfo) 330 | df_name+=[info_.fea_name for info_ in seqInfo] 331 | inputs[2]=seq_inputs 332 | 333 | return inputs 334 | 335 | def data_pipeline(self,dataSet:tuple): 336 | dataSet=tf.data.Dataset.from_tensor_slices(dataSet) 337 | return dataSet.shuffle(2048).repeat(2).batch(batch_size=self.batch_size).prefetch(2) 338 | 339 | def extract_train_test(self,train_idx, test_idx,targetDf,sparseDf=None, denseDf=None,seqDf=None,use_softmax=True): 340 | try: 341 | train_dense = denseDf.loc[train_idx] 342 | test_dense = denseDf.loc[test_idx] 343 | except AttributeError: 344 | train_dense,test_dense=None,None 345 | 346 | try: 347 | train_sparse = sparseDf.loc[train_idx] 348 | test_sparse = sparseDf.loc[test_idx] 349 | except AttributeError: 350 | train_sparse, test_sparse = None, None 351 | 352 | try: 353 | train_seq={key:seqDf[key][train_idx] for key in seqDf} 354 | test_seq={key:seqDf[key][test_idx] for key in seqDf} 355 | except TypeError: 356 | train_seq,test_seq= {}, {} 357 | 358 | if use_softmax: 359 | targetDf=tf.keras.utils.to_categorical(targetDf.values.tolist()) 360 | else: 361 | targetDf=targetDf.values 362 | 363 | y_train=targetDf[train_idx] 364 | y_test=targetDf[test_idx] 365 | 366 | train_df=self.df_format_input([train_dense,train_sparse]) 367 | test_df=self.df_format_input([test_dense,test_sparse]) 368 | train_df.update(train_seq) 369 | test_df.update(test_seq) 370 | 371 | if self.batch_size!=None: 372 | train_df=self.static_batch(train_df) 373 | test_df=self.static_batch(test_df) 374 | y_train=self.static_batch(y_train) 375 | y_test=self.static_batch(y_test) 376 | 377 | train=self.data_pipeline((train_df,y_train)) 378 | test=self.data_pipeline((test_df,y_test)) 379 | 380 | return train,test 381 | 382 | def input_loc(self,df,use_idx:list): 383 | ''' 384 | :param df: format df 385 | :param use_idx: use idx,e.g k-flods 386 | :return: df[use idx] 387 | ''' 388 | if isinstance(df, dict): 389 | return {key: np.array(df[key])[use_idx] for key in df} 390 | else: 391 | return df[use_idx] 392 | 393 | def static_batch(self,df): 394 | if isinstance(df,dict): 395 | df_num=np.array(df[list(df.keys())[0]]).shape[0] 396 | else: 397 | df_num=len(df) 398 | 399 | batch_num = (df_num // self.batch_size) * self.batch_size 400 | need_idx = np.random.choice(list(range(df_num)), size=batch_num) 401 | if self.use_shuffle: 402 | shuffle(need_idx) 403 | 404 | df = self.input_loc(df, use_idx=need_idx) 405 | 406 | return df 407 | 408 | def split_val_set(self,train_df,y_train,train_index,val_index): 409 | train_x = self.input_loc(df=train_df, use_idx=train_index) 410 | train_y = self.input_loc(df=y_train, use_idx=train_index) 411 | val_x = self.input_loc(df=train_df, use_idx=val_index) 412 | val_y = self.input_loc(df=y_train, use_idx=val_index) 413 | 414 | return train_x,train_y,(val_x,val_y) -------------------------------------------------------------------------------- /kon/wrapper/Feature_Columns.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/9/30 下午7:56 6 | @File :Feature_Columns.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' 9 | from collections import namedtuple 10 | 11 | class NumsFeaWrapper(object): 12 | def __init__(self): 13 | self.denseFea=namedtuple('denseFea',['fea_name','batch_size']) 14 | 15 | class SparseFeaWrapper(object): 16 | def __init__(self): 17 | self.sparseFeature=namedtuple('sparseFea', 18 | ['fea_name', 'word_size', 'input_dim', 'cross_unit', 'linear_unit', 'pre_weight', 'mask_zero', 19 | 'is_trainable', 'input_length', 'sample_num', 'batch_size', 'emb_reg']) 20 | 21 | class NumsFea(NumsFeaWrapper): 22 | def __init__(self): 23 | super(NumsFea, self).__init__() 24 | self.fea=self.denseFea 25 | 26 | class CateFea(SparseFeaWrapper): 27 | def __init__(self): 28 | super(CateFea, self).__init__() 29 | self.fea=self.sparseFeature 30 | 31 | class BehaviorFea(SparseFeaWrapper): 32 | def __init__(self): 33 | super(BehaviorFea, self).__init__() 34 | self.fea = self.sparseFeature 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /kon/wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | '''================================= 4 | @Author :tix_hjq 5 | @Date :2020/9/30 下午7:55 6 | @File :__init__.py.py 7 | @email :hjq1922451756@gmail.com or 1922451756@qq.com 8 | =================================''' 9 | from sklearn.model_selection import KFold, StratifiedKFold 10 | from sklearn.metrics import mean_squared_error as mse 11 | from sklearn.preprocessing import LabelEncoder 12 | from sklearn.metrics import f1_score, r2_score 13 | from hyperopt import fmin, tpe, hp, partial 14 | from numpy.random import random, shuffle 15 | import matplotlib.pyplot as plt 16 | from pandas import DataFrame 17 | import tensorflow as tf 18 | from tqdm import tqdm 19 | from PIL import Image 20 | import lightgbm as lgb 21 | import networkx as nx 22 | import pandas as pd 23 | import numpy as np 24 | import warnings 25 | import cv2 26 | import os 27 | import gc 28 | import re 29 | import datetime 30 | import sys 31 | from kon.model.embedding.setence_model import * 32 | from kon.model.feature_eng.feature_transform import feature_tool 33 | from kon.model.feature_eng.base_model import base_model 34 | from kon.model.ctr_model.model.models import * -------------------------------------------------------------------------------- /paper/Next Read/A Convolutional Click Prediction Model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/A Convolutional Click Prediction Model.pdf -------------------------------------------------------------------------------- /paper/Next Read/[DSSM] Learning Deep Structured Semantic Models for Web Search using Clickthrough Data (UIUC 2013).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[DSSM] Learning Deep Structured Semantic Models for Web Search using Clickthrough Data (UIUC 2013).pdf -------------------------------------------------------------------------------- /paper/Next Read/[ESMM] Entire Space Multi-Task Model - An Effective Approach for Estimating Post-Click Conversion Rate (Alibaba 2018).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[ESMM] Entire Space Multi-Task Model - An Effective Approach for Estimating Post-Click Conversion Rate (Alibaba 2018).pdf -------------------------------------------------------------------------------- /paper/Next Read/[FAT-DeepFFM]Field Attentive Deep Field-aware Factorization Machine[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FAT-DeepFFM]Field Attentive Deep Field-aware Factorization Machine[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[FGCNN]Feature Generation by Convolutional Neural Network forClick-Through Rate Predicti[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FGCNN]Feature Generation by Convolutional Neural Network forClick-Through Rate Predicti[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[FLEN] Leveraging Field for Scalable CTR Predicti[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FLEN] Leveraging Field for Scalable CTR Predicti[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[FTRL] Ad Click Prediction a View from the Trenches (Google 2013).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FTRL] Ad Click Prediction a View from the Trenches (Google 2013).pdf -------------------------------------------------------------------------------- /paper/Next Read/[Fi-GNN]Modeling Feature Interactions via Graph Neural Networks for CTR Prediction[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[Fi-GNN]Modeling Feature Interactions via Graph Neural Networks for CTR Prediction[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[FiBiNET]Combining Feature Importance and Bilinear featureInteraction for Click-Through Rate Predict[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[FiBiNET]Combining Feature Importance and Bilinear featureInteraction for Click-Through Rate Predict[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[GBDT+LR] Practical Lessons from Predicting Clicks on Ads at Facebook (Facebook 2014).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[GBDT+LR] Practical Lessons from Predicting Clicks on Ads at Facebook (Facebook 2014).pdf -------------------------------------------------------------------------------- /paper/Next Read/[Image CTR] Image Matters - Visually modeling user behaviors using Advanced Model Server (Alibaba 2018).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[Image CTR] Image Matters - Visually modeling user behaviors using Advanced Model Server (Alibaba 2018).pdf -------------------------------------------------------------------------------- /paper/Next Read/[MINDN]Multi-Interest Network with Dynamic Routing for Recommendation at Tmall[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[MINDN]Multi-Interest Network with Dynamic Routing for Recommendation at Tmall[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[OENN]Order-aware Embedding Neural Network for CTR Predicti[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[OENN]Order-aware Embedding Neural Network for CTR Predicti[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[ONN]Operation-aware Neural Networks for User Response Prediction[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[ONN]Operation-aware Neural Networks for User Response Prediction[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[PS-PLM] Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction (Alibaba 2017).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[PS-PLM] Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction (Alibaba 2017).pdf -------------------------------------------------------------------------------- /paper/Next Read/[RLAC]Representation Learning-Assisted Click-Through Rate Prediction[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[RLAC]Representation Learning-Assisted Click-Through Rate Prediction[2019].pdf -------------------------------------------------------------------------------- /paper/Next Read/[Warm Up Cold-start Advertisements]Improving CTR Predictions via Learning to Learn ID Embeddings[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/Next Read/[Warm Up Cold-start Advertisements]Improving CTR Predictions via Learning to Learn ID Embeddings[2019].pdf -------------------------------------------------------------------------------- /paper/README: -------------------------------------------------------------------------------- 1 | paper collection 2 | -------------------------------------------------------------------------------- /paper/behavior/[ALSH]Asymmetric LSH for Sublinear Time Maximum Inner Product Search[2014].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[ALSH]Asymmetric LSH for Sublinear Time Maximum Inner Product Search[2014].pdf -------------------------------------------------------------------------------- /paper/behavior/[BST]Behavior Sequence Transformer for E-commerce Recommendation in Alibaba[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[BST]Behavior Sequence Transformer for E-commerce Recommendation in Alibaba[2019].pdf -------------------------------------------------------------------------------- /paper/behavior/[DIEN] Deep Interest Evolution Network for Click-Through Rate Prediction (Alibaba 2019).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DIEN] Deep Interest Evolution Network for Click-Through Rate Prediction (Alibaba 2019).pdf -------------------------------------------------------------------------------- /paper/behavior/[DIN] Deep Interest Network for Click-Through Rate Prediction (Alibaba 2018).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DIN] Deep Interest Network for Click-Through Rate Prediction (Alibaba 2018).pdf -------------------------------------------------------------------------------- /paper/behavior/[DSIN]Deep Session Interest Network for Click-Through Rate Predicti[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DSIN]Deep Session Interest Network for Click-Through Rate Predicti[2019].pdf -------------------------------------------------------------------------------- /paper/behavior/[DSTN]Deep Spatio-Temporal Neural Networks for Click-Through Rate Prediction[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DSTN]Deep Spatio-Temporal Neural Networks for Click-Through Rate Prediction[2019].pdf -------------------------------------------------------------------------------- /paper/behavior/[DTSF]Deep Time-Stream Framework for Click-Through Rate Prediction by Tracking Interest Evolution[2020].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[DTSF]Deep Time-Stream Framework for Click-Through Rate Prediction by Tracking Interest Evolution[2020].pdf -------------------------------------------------------------------------------- /paper/behavior/[LSM]Lifelong Sequential Modeling with Personalized Memorization for User Response Prediction[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[LSM]Lifelong Sequential Modeling with Personalized Memorization for User Response Prediction[2019].pdf -------------------------------------------------------------------------------- /paper/behavior/[MIMN]Practice on Long Sequential User Behavior Modeling for Click-Through Rate Prediction[2019].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[MIMN]Practice on Long Sequential User Behavior Modeling for Click-Through Rate Prediction[2019].pdf -------------------------------------------------------------------------------- /paper/behavior/[NTM]Neural Turing Machines[2014].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[NTM]Neural Turing Machines[2014].pdf -------------------------------------------------------------------------------- /paper/behavior/[NTM]The_NTM_Introduction_And_Implementation[2017].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[NTM]The_NTM_Introduction_And_Implementation[2017].pdf -------------------------------------------------------------------------------- /paper/behavior/[REFORMER] THE EFFICIENT TRANSFORMER[2020].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[REFORMER] THE EFFICIENT TRANSFORMER[2020].pdf -------------------------------------------------------------------------------- /paper/behavior/[SIM]Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction[2020].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[SIM]Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction[2020].pdf -------------------------------------------------------------------------------- /paper/behavior/[Self-Attention]Attention is all you need(Google 2017).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[Self-Attention]Attention is all you need(Google 2017).pdf -------------------------------------------------------------------------------- /paper/behavior/[SeqFM]Sequence-Aware Factorization Machines(2019).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/behavior/[SeqFM]Sequence-Aware Factorization Machines(2019).pdf -------------------------------------------------------------------------------- /paper/interactive/[AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[AFM] Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks (ZJU 2017).pdf -------------------------------------------------------------------------------- /paper/interactive/[AutoInt] AutoInt Automatic Feature Interaction Learning via Self-Attentive Neural Networks(CIKM 2019).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[AutoInt] AutoInt Automatic Feature Interaction Learning via Self-Attentive Neural Networks(CIKM 2019).pdf -------------------------------------------------------------------------------- /paper/interactive/[DCN] Deep & Cross Network for Ad Click Predictions (Stanford 2017).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[DCN] Deep & Cross Network for Ad Click Predictions (Stanford 2017).pdf -------------------------------------------------------------------------------- /paper/interactive/[Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[Deep Crossing] Deep Crossing - Web-Scale Modeling without Manually Crafted Combinatorial Features (Microsoft 2016).pdf -------------------------------------------------------------------------------- /paper/interactive/[DeepFM] A Factorization-Machine based Neural Network for CTR Prediction (HIT-Huawei 2017).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[DeepFM] A Factorization-Machine based Neural Network for CTR Prediction (HIT-Huawei 2017).pdf -------------------------------------------------------------------------------- /paper/interactive/[FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[FFM] Field-aware Factorization Machines for CTR Prediction (Criteo 2016).pdf -------------------------------------------------------------------------------- /paper/interactive/[FM] Fast Context-aware Recommendations with Factorization Machines (UKON 2011).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[FM] Fast Context-aware Recommendations with Factorization Machines (UKON 2011).pdf -------------------------------------------------------------------------------- /paper/interactive/[FNN] Deep Learning over Multi-field Categorical Data (UCL 2016).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[FNN] Deep Learning over Multi-field Categorical Data (UCL 2016).pdf -------------------------------------------------------------------------------- /paper/interactive/[LR] Predicting Clicks - Estimating the Click-Through Rate for New Ads (Microsoft 2007).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[LR] Predicting Clicks - Estimating the Click-Through Rate for New Ads (Microsoft 2007).pdf -------------------------------------------------------------------------------- /paper/interactive/[NFM] Neural Factorization Machines for Sparse Predictive Analytics (NUS 2017).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[NFM] Neural Factorization Machines for Sparse Predictive Analytics (NUS 2017).pdf -------------------------------------------------------------------------------- /paper/interactive/[PNN] Product-based Neural Networks for User Response Prediction (SJTU 2016).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[PNN] Product-based Neural Networks for User Response Prediction (SJTU 2016).pdf -------------------------------------------------------------------------------- /paper/interactive/[Wide & Deep] Wide & Deep Learning for Recommender Systems (Google 2016).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[Wide & Deep] Wide & Deep Learning for Recommender Systems (Google 2016).pdf -------------------------------------------------------------------------------- /paper/interactive/[xDeepFM] xDeepFM - Combining Explicit and Implicit Feature Interactions for Recommender Systems (USTC 2018).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIXhjq/ML_Function/bbb85327151257e40526ebd35e34fe4f1b0d9398/paper/interactive/[xDeepFM] xDeepFM - Combining Explicit and Implicit Feature Interactions for Recommender Systems (USTC 2018).pdf --------------------------------------------------------------------------------