├── .gitignore ├── LICENSE ├── README.md ├── data ├── .gitkeep └── query.sql ├── models └── .gitkeep ├── requirements.txt └── src ├── .gitkeep ├── app ├── api.go ├── db │ └── db.go ├── go.mod └── go.sum ├── feature_store ├── exec.sh ├── execute.py ├── fs_general.sql ├── fs_horario.sql ├── fs_points.sql ├── fs_produtos.sql └── fs_transacoes.sql ├── pipeline.sh ├── predict ├── etl.sql ├── predict.py └── profile_user.py ├── rfv ├── analise_freq_valor.py └── analise_recencia.py └── train ├── abt.sql ├── export_abt.py ├── semma_ex.py ├── train.py └── train_mlflow.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.db 3 | *.csv 4 | *.xlsx 5 | *.pkl 6 | dsenv/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial-ShareAlike 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International 58 | Public License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License 63 | ("Public License"). To the extent this Public License may be 64 | interpreted as a contract, You are granted the Licensed Rights in 65 | consideration of Your acceptance of these terms and conditions, and the 66 | Licensor grants You such rights in consideration of benefits the 67 | Licensor receives from making the Licensed Material available under 68 | these terms and conditions. 69 | 70 | 71 | Section 1 -- Definitions. 72 | 73 | a. Adapted Material means material subject to Copyright and Similar 74 | Rights that is derived from or based upon the Licensed Material 75 | and in which the Licensed Material is translated, altered, 76 | arranged, transformed, or otherwise modified in a manner requiring 77 | permission under the Copyright and Similar Rights held by the 78 | Licensor. For purposes of this Public License, where the Licensed 79 | Material is a musical work, performance, or sound recording, 80 | Adapted Material is always produced where the Licensed Material is 81 | synched in timed relation with a moving image. 82 | 83 | b. Adapter's License means the license You apply to Your Copyright 84 | and Similar Rights in Your contributions to Adapted Material in 85 | accordance with the terms and conditions of this Public License. 86 | 87 | c. BY-NC-SA Compatible License means a license listed at 88 | creativecommons.org/compatiblelicenses, approved by Creative 89 | Commons as essentially the equivalent of this Public License. 90 | 91 | d. Copyright and Similar Rights means copyright and/or similar rights 92 | closely related to copyright including, without limitation, 93 | performance, broadcast, sound recording, and Sui Generis Database 94 | Rights, without regard to how the rights are labeled or 95 | categorized. For purposes of this Public License, the rights 96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 97 | Rights. 98 | 99 | e. Effective Technological Measures means those measures that, in the 100 | absence of proper authority, may not be circumvented under laws 101 | fulfilling obligations under Article 11 of the WIPO Copyright 102 | Treaty adopted on December 20, 1996, and/or similar international 103 | agreements. 104 | 105 | f. Exceptions and Limitations means fair use, fair dealing, and/or 106 | any other exception or limitation to Copyright and Similar Rights 107 | that applies to Your use of the Licensed Material. 108 | 109 | g. License Elements means the license attributes listed in the name 110 | of a Creative Commons Public License. The License Elements of this 111 | Public License are Attribution, NonCommercial, and ShareAlike. 112 | 113 | h. Licensed Material means the artistic or literary work, database, 114 | or other material to which the Licensor applied this Public 115 | License. 116 | 117 | i. Licensed Rights means the rights granted to You subject to the 118 | terms and conditions of this Public License, which are limited to 119 | all Copyright and Similar Rights that apply to Your use of the 120 | Licensed Material and that the Licensor has authority to license. 121 | 122 | j. Licensor means the individual(s) or entity(ies) granting rights 123 | under this Public License. 124 | 125 | k. NonCommercial means not primarily intended for or directed towards 126 | commercial advantage or monetary compensation. For purposes of 127 | this Public License, the exchange of the Licensed Material for 128 | other material subject to Copyright and Similar Rights by digital 129 | file-sharing or similar means is NonCommercial provided there is 130 | no payment of monetary compensation in connection with the 131 | exchange. 132 | 133 | l. Share means to provide material to the public by any means or 134 | process that requires permission under the Licensed Rights, such 135 | as reproduction, public display, public performance, distribution, 136 | dissemination, communication, or importation, and to make material 137 | available to the public including in ways that members of the 138 | public may access the material from a place and at a time 139 | individually chosen by them. 140 | 141 | m. Sui Generis Database Rights means rights other than copyright 142 | resulting from Directive 96/9/EC of the European Parliament and of 143 | the Council of 11 March 1996 on the legal protection of databases, 144 | as amended and/or succeeded, as well as other essentially 145 | equivalent rights anywhere in the world. 146 | 147 | n. You means the individual or entity exercising the Licensed Rights 148 | under this Public License. Your has a corresponding meaning. 149 | 150 | 151 | Section 2 -- Scope. 152 | 153 | a. License grant. 154 | 155 | 1. Subject to the terms and conditions of this Public License, 156 | the Licensor hereby grants You a worldwide, royalty-free, 157 | non-sublicensable, non-exclusive, irrevocable license to 158 | exercise the Licensed Rights in the Licensed Material to: 159 | 160 | a. reproduce and Share the Licensed Material, in whole or 161 | in part, for NonCommercial purposes only; and 162 | 163 | b. produce, reproduce, and Share Adapted Material for 164 | NonCommercial purposes only. 165 | 166 | 2. Exceptions and Limitations. For the avoidance of doubt, where 167 | Exceptions and Limitations apply to Your use, this Public 168 | License does not apply, and You do not need to comply with 169 | its terms and conditions. 170 | 171 | 3. Term. The term of this Public License is specified in Section 172 | 6(a). 173 | 174 | 4. Media and formats; technical modifications allowed. The 175 | Licensor authorizes You to exercise the Licensed Rights in 176 | all media and formats whether now known or hereafter created, 177 | and to make technical modifications necessary to do so. The 178 | Licensor waives and/or agrees not to assert any right or 179 | authority to forbid You from making technical modifications 180 | necessary to exercise the Licensed Rights, including 181 | technical modifications necessary to circumvent Effective 182 | Technological Measures. For purposes of this Public License, 183 | simply making modifications authorized by this Section 2(a) 184 | (4) never produces Adapted Material. 185 | 186 | 5. Downstream recipients. 187 | 188 | a. Offer from the Licensor -- Licensed Material. Every 189 | recipient of the Licensed Material automatically 190 | receives an offer from the Licensor to exercise the 191 | Licensed Rights under the terms and conditions of this 192 | Public License. 193 | 194 | b. Additional offer from the Licensor -- Adapted Material. 195 | Every recipient of Adapted Material from You 196 | automatically receives an offer from the Licensor to 197 | exercise the Licensed Rights in the Adapted Material 198 | under the conditions of the Adapter's License You apply. 199 | 200 | c. No downstream restrictions. You may not offer or impose 201 | any additional or different terms or conditions on, or 202 | apply any Effective Technological Measures to, the 203 | Licensed Material if doing so restricts exercise of the 204 | Licensed Rights by any recipient of the Licensed 205 | Material. 206 | 207 | 6. No endorsement. Nothing in this Public License constitutes or 208 | may be construed as permission to assert or imply that You 209 | are, or that Your use of the Licensed Material is, connected 210 | with, or sponsored, endorsed, or granted official status by, 211 | the Licensor or others designated to receive attribution as 212 | provided in Section 3(a)(1)(A)(i). 213 | 214 | b. Other rights. 215 | 216 | 1. Moral rights, such as the right of integrity, are not 217 | licensed under this Public License, nor are publicity, 218 | privacy, and/or other similar personality rights; however, to 219 | the extent possible, the Licensor waives and/or agrees not to 220 | assert any such rights held by the Licensor to the limited 221 | extent necessary to allow You to exercise the Licensed 222 | Rights, but not otherwise. 223 | 224 | 2. Patent and trademark rights are not licensed under this 225 | Public License. 226 | 227 | 3. To the extent possible, the Licensor waives any right to 228 | collect royalties from You for the exercise of the Licensed 229 | Rights, whether directly or through a collecting society 230 | under any voluntary or waivable statutory or compulsory 231 | licensing scheme. In all other cases the Licensor expressly 232 | reserves any right to collect such royalties, including when 233 | the Licensed Material is used other than for NonCommercial 234 | purposes. 235 | 236 | 237 | Section 3 -- License Conditions. 238 | 239 | Your exercise of the Licensed Rights is expressly made subject to the 240 | following conditions. 241 | 242 | a. Attribution. 243 | 244 | 1. If You Share the Licensed Material (including in modified 245 | form), You must: 246 | 247 | a. retain the following if it is supplied by the Licensor 248 | with the Licensed Material: 249 | 250 | i. identification of the creator(s) of the Licensed 251 | Material and any others designated to receive 252 | attribution, in any reasonable manner requested by 253 | the Licensor (including by pseudonym if 254 | designated); 255 | 256 | ii. a copyright notice; 257 | 258 | iii. a notice that refers to this Public License; 259 | 260 | iv. a notice that refers to the disclaimer of 261 | warranties; 262 | 263 | v. a URI or hyperlink to the Licensed Material to the 264 | extent reasonably practicable; 265 | 266 | b. indicate if You modified the Licensed Material and 267 | retain an indication of any previous modifications; and 268 | 269 | c. indicate the Licensed Material is licensed under this 270 | Public License, and include the text of, or the URI or 271 | hyperlink to, this Public License. 272 | 273 | 2. You may satisfy the conditions in Section 3(a)(1) in any 274 | reasonable manner based on the medium, means, and context in 275 | which You Share the Licensed Material. For example, it may be 276 | reasonable to satisfy the conditions by providing a URI or 277 | hyperlink to a resource that includes the required 278 | information. 279 | 3. If requested by the Licensor, You must remove any of the 280 | information required by Section 3(a)(1)(A) to the extent 281 | reasonably practicable. 282 | 283 | b. ShareAlike. 284 | 285 | In addition to the conditions in Section 3(a), if You Share 286 | Adapted Material You produce, the following conditions also apply. 287 | 288 | 1. The Adapter's License You apply must be a Creative Commons 289 | license with the same License Elements, this version or 290 | later, or a BY-NC-SA Compatible License. 291 | 292 | 2. You must include the text of, or the URI or hyperlink to, the 293 | Adapter's License You apply. You may satisfy this condition 294 | in any reasonable manner based on the medium, means, and 295 | context in which You Share Adapted Material. 296 | 297 | 3. You may not offer or impose any additional or different terms 298 | or conditions on, or apply any Effective Technological 299 | Measures to, Adapted Material that restrict exercise of the 300 | rights granted under the Adapter's License You apply. 301 | 302 | 303 | Section 4 -- Sui Generis Database Rights. 304 | 305 | Where the Licensed Rights include Sui Generis Database Rights that 306 | apply to Your use of the Licensed Material: 307 | 308 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 309 | to extract, reuse, reproduce, and Share all or a substantial 310 | portion of the contents of the database for NonCommercial purposes 311 | only; 312 | 313 | b. if You include all or a substantial portion of the database 314 | contents in a database in which You have Sui Generis Database 315 | Rights, then the database in which You have Sui Generis Database 316 | Rights (but not its individual contents) is Adapted Material, 317 | including for purposes of Section 3(b); and 318 | 319 | c. You must comply with the conditions in Section 3(a) if You Share 320 | all or a substantial portion of the contents of the database. 321 | 322 | For the avoidance of doubt, this Section 4 supplements and does not 323 | replace Your obligations under this Public License where the Licensed 324 | Rights include other Copyright and Similar Rights. 325 | 326 | 327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 328 | 329 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 330 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 331 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 332 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 333 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 334 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 335 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 336 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 337 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 338 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 339 | 340 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 341 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 342 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 343 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 344 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 345 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 346 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 347 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 348 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 349 | 350 | c. The disclaimer of warranties and limitation of liability provided 351 | above shall be interpreted in a manner that, to the extent 352 | possible, most closely approximates an absolute disclaimer and 353 | waiver of all liability. 354 | 355 | 356 | Section 6 -- Term and Termination. 357 | 358 | a. This Public License applies for the term of the Copyright and 359 | Similar Rights licensed here. However, if You fail to comply with 360 | this Public License, then Your rights under this Public License 361 | terminate automatically. 362 | 363 | b. Where Your right to use the Licensed Material has terminated under 364 | Section 6(a), it reinstates: 365 | 366 | 1. automatically as of the date the violation is cured, provided 367 | it is cured within 30 days of Your discovery of the 368 | violation; or 369 | 370 | 2. upon express reinstatement by the Licensor. 371 | 372 | For the avoidance of doubt, this Section 6(b) does not affect any 373 | right the Licensor may have to seek remedies for Your violations 374 | of this Public License. 375 | 376 | c. For the avoidance of doubt, the Licensor may also offer the 377 | Licensed Material under separate terms or conditions or stop 378 | distributing the Licensed Material at any time; however, doing so 379 | will not terminate this Public License. 380 | 381 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 382 | License. 383 | 384 | 385 | Section 7 -- Other Terms and Conditions. 386 | 387 | a. The Licensor shall not be bound by any additional or different 388 | terms or conditions communicated by You unless expressly agreed. 389 | 390 | b. Any arrangements, understandings, or agreements regarding the 391 | Licensed Material not stated herein are separate from and 392 | independent of the terms and conditions of this Public License. 393 | 394 | 395 | Section 8 -- Interpretation. 396 | 397 | a. For the avoidance of doubt, this Public License does not, and 398 | shall not be interpreted to, reduce, limit, restrict, or impose 399 | conditions on any use of the Licensed Material that could lawfully 400 | be made without permission under this Public License. 401 | 402 | b. To the extent possible, if any provision of this Public License is 403 | deemed unenforceable, it shall be automatically reformed to the 404 | minimum extent necessary to make it enforceable. If the provision 405 | cannot be reformed, it shall be severed from this Public License 406 | without affecting the enforceability of the remaining terms and 407 | conditions. 408 | 409 | c. No term or condition of this Public License will be waived and no 410 | failure to comply consented to unless expressly agreed to by the 411 | Licensor. 412 | 413 | d. Nothing in this Public License constitutes or may be interpreted 414 | as a limitation upon, or waiver of, any privileges and immunities 415 | that apply to the Licensor or You, including from the legal 416 | processes of any jurisdiction or authority. 417 | 418 | ======================================================================= 419 | 420 | Creative Commons is not a party to its public 421 | licenses. Notwithstanding, Creative Commons may elect to apply one of 422 | its public licenses to material it publishes and in those instances 423 | will be considered the “Licensor.” The text of the Creative Commons 424 | public licenses is dedicated to the public domain under the CC0 Public 425 | Domain Dedication. Except for the limited purpose of indicating that 426 | material is shared under a Creative Commons public license or as 427 | otherwise permitted by the Creative Commons policies published at 428 | creativecommons.org/policies, Creative Commons does not authorize the 429 | use of the trademark "Creative Commons" or any other trademark or logo 430 | of Creative Commons without its prior written consent including, 431 | without limitation, in connection with any unauthorized modifications 432 | to any of its public licenses or any other arrangements, 433 | understandings, or agreements concerning use of licensed material. For 434 | the avoidance of doubt, this paragraph does not form part of the 435 | public licenses. 436 | 437 | Creative Commons may be contacted at creativecommons.org. 438 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Science & Points 2 | 3 | [![CC BY-NC-SA 4.0][cc-by-nc-sa-shield]][cc-by-nc-sa] 4 | 5 | teomewhy-A-little-child-wizard-wearing-a-purple-cloak-using-h-d359021c-4186-4e11-9693-a6e4f1b1b7c5-3 6 | 7 | Projeto de aplicação em Data Science do início ao fim. Um pipeline completo para solução de dados. 8 | 9 | - [Sobre](#sobre) 10 | - [Contexto](#contexto) 11 | - [Etapas](#etapas) 12 | - [Pré-requisitos](#pré-requisitos) 13 | - [Desafio](#desafio) 14 | - [Sobre o autor](#sobre-o-autor) 15 | - [Como apoiar](#apoie-essa-inciativa) 16 | 17 | Este material está sob a licença: [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License][cc-by-nc-sa]. 18 | 19 | [![CC BY-NC-SA 4.0][cc-by-nc-sa-image]][cc-by-nc-sa] 20 | 21 | [cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/ 22 | [cc-by-nc-sa-image]: https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png 23 | [cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg 24 | 25 | ## Sobre 26 | Coinstruimos uma solução de Data Science, aplicando técnicas de Machine Learning para um problema de negócios específico. 27 | 28 | Tudo foi desenvolvido ao vivo no canal [Téo Me Why](https://teomewhy.org) e disponibilizado para nossos Subs da Twitch e Membros do YouTube. 29 | 30 | Assina aqui: [Twitch](https://www.twitch.tv/collections/jg9itHOO1ReLcw) / [YouTube](https://www.youtube.com/playlist?list=PLvlkVRRKOYFQOkwDvfgCvKi9-I1jQXiy7) 31 | 32 | ### Contexto 33 | Temos os dados de nossos usuários de sistema de pontos do canal. Com base nisso, desejamos identificar ações e produtos de dados que aumentem o engajamento de nossos usuários. 34 | 35 | Assim, pensamos em construir um projeto de Data Science que aborde todas as etapas necessárias para construção de um produto de dados. 36 | 37 | ### Etapas 38 | - Construção de Feature Store; 39 | - Processamento das safras; 40 | - Construção da variável resposta; 41 | - Construção da ABT (*Analytical Base Table*); 42 | - Treinamento de modelos preditivos; 43 | - Deploy; 44 | 45 | ### Pré-requisitos 46 | 47 | #### Disciplinas 48 | 49 | Para ter uma melhor experiência com nosso projeto, vale a pena conferir as seguintes playlists totalmente gratuitas: 50 | 51 | - [Git/GitHub](https://www.youtube.com/playlist?list=PLvlkVRRKOYFQ3cfYPjLeQ0KvrQ8bG5H11) 52 | - [Python](https://www.youtube.com/playlist?list=PLvlkVRRKOYFRXdquucikNbwYeFzzzYIGb) 53 | - [Pandas](https://www.youtube.com/playlist?list=PLvlkVRRKOYFSl-XCxNQ1u3uOLvDnYxupG) 54 | - [Estatística](https://www.youtube.com/playlist?list=PLvlkVRRKOYFSWIyhwq4Nu8sNd_GfOi1tj) 55 | - [Machine Learning](https://www.youtube.com/playlist?list=PLvlkVRRKOYFTXcpttQSZmv1wDg7F3uH7o) 56 | 57 | #### Materiais 58 | 59 | - :arrow_lower_right: [Baixe os dados aqui!](https://drive.google.com/drive/folders/1JLzofrtaVQdo0PdUysNWjNsBdAaI21EJ?usp=sharing) :arrow_lower_left: 60 | - :arrow_lower_right: [Acesso a Apresentação aqui!](https://docs.google.com/presentation/d/1zMTsaAeoMX9ico13PVd7_tOffE8kUH-IOA5kCjSYIx8/edit?usp=sharing) :arrow_lower_left: 61 | 62 | #### Softwares 63 | - [Python/Anaconda](anaconda.com/download) 64 | - [VSCode](https://code.visualstudio.com/download) 65 | - [Extensão Python](https://marketplace.visualstudio.com/items?itemName=ms-python.python) 66 | - [Extensão Jupyter](https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter) 67 | - [Extensão SQLite](https://marketplace.visualstudio.com/items?itemName=alexcvzz.vscode-sqlite) 68 | - [Extensão SQLTools SQLite](https://marketplace.visualstudio.com/items?itemName=mtxr.sqltools-driver-sqlite) 69 | 70 | #### Setup 71 | 72 | Com as ferramentas necessários instaladas, podemos criar nosso *enviroment* a partir do Anaconda (conda): 73 | 74 | ```bash 75 | conda create --name ds_points python=3. 76 | conda activate ds_points 77 | 78 | pip install -r requirements.txt 79 | ``` 80 | 81 | ## Desafio 82 | 83 | Durante o nosso curso realizamos o treinamento de um modelo Random Forest com GridSearch. A partir deste modelo, obtivemos as seguintes métricas: 84 | 85 | | Base | Acurárica | Curva Roc | Precisão | Recall | 86 | | :---: | :---: | :---: | ---: | :---: | 87 | | **Train** | 0.819401 | 0.913987 | 0.770598 | 0.845745 | 88 | | **Test** | 0.747634 | 0.817416 | 0.684848 | 0.801418 | 89 | | **Oot** | 0.741602 | 0.814528 | 0.669291 | 0.594406 | 90 | 91 | Utilize os dados [deste link](https://docs.google.com/spreadsheets/d/1zcP7CKDcqEkhK2b_g27yGY226ZaX_kX4UxBsNQfM9RQ/edit?usp=sharing) para tentar melhorar a performance do modelo na base Out of Time (oot). 92 | 93 | Considere: 94 | 95 | ```python 96 | 97 | target = 'flChurn' 98 | features = df_train.columns[3:].tolist() 99 | 100 | # Dataframe oot 101 | df_oot = df[df['dtRef']==df['dtRef'].max()] 102 | 103 | # Dataframe de treino 104 | df_train = df[df['dtRef'] 121 | 122 | 123 | 124 | 125 | 126 | 127 | ## Apoie essa inciativa! 128 | 129 | Realizamos um trabalho de educação na área de dados de forma gratuita, então todo apoio é importante. Confira as diferentes maneiras de nos apoiar: 130 | 131 | - 💵 Chave Pix: pix@teomewhy.org 132 | - 💶 LivePix: [livepix.gg/teomewhy](livepix.gg/teomewhy) 133 | - 💷 GitHub Sponsors: [github.com/sponsors/TeoMeWhy](github.com/sponsors/TeoMeWhy) 134 | - 💴 ApoiaSe: [apoia.se/teomewhy](apoia.se/teomewhy) 135 | - 🎥 Membro no YouTube: [youtube.com/@teomewhy/membership](https://www.youtube.com/@teomewhy/membership) 136 | - 🎮 Sub na Twitch: [twitch.tv/teomewhy](https://www.twitch.tv/teomewhy) 137 | - 💌 Newsletter: [teomewhy.substack.com](https://teomewhy.substack.com/) 138 | -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeoMeWhy/ds-points/055bcc014bf896757ee412951296dc3e1bd37aba/data/.gitkeep -------------------------------------------------------------------------------- /data/query.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM customer_profile 3 | 4 | group by 1,2 -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeoMeWhy/ds-points/055bcc014bf896757ee412951296dc3e1bd37aba/models/.gitkeep -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | feature_engine==1.8.0 2 | pandas==2.2.2 3 | scikit_learn==1.5.0 4 | SQLAlchemy==2.0.30 5 | tqdm==4.66.4 6 | mlflow==2.13.2 7 | openpyxl==3.1.4 -------------------------------------------------------------------------------- /src/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeoMeWhy/ds-points/055bcc014bf896757ee412951296dc3e1bd37aba/src/.gitkeep -------------------------------------------------------------------------------- /src/app/api.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "app/db" 5 | "net/http" 6 | 7 | "github.com/gin-gonic/gin" 8 | ) 9 | 10 | var con, _ = db.Connect() 11 | 12 | func getUserProfile(c *gin.Context) { 13 | id := c.Param("id") 14 | 15 | profile, err := db.GetUser(id, con) 16 | if err != nil { 17 | c.JSON(http.StatusInternalServerError, gin.H{"error": "erro interno na busca pelo usuário"}) 18 | return 19 | } 20 | 21 | if profile["idCustomer"] == "" { 22 | c.JSON(http.StatusNotFound, gin.H{"error": "usuário não encontrado"}) 23 | return 24 | } 25 | 26 | c.JSON(http.StatusOK, profile) 27 | 28 | } 29 | 30 | func main() { 31 | 32 | router := gin.Default() 33 | 34 | router.GET("/profile/:id", func(c *gin.Context) { 35 | getUserProfile(c) 36 | }) 37 | 38 | router.Run("localhost:8082") 39 | } 40 | -------------------------------------------------------------------------------- /src/app/db/db.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "database/sql" 5 | 6 | _ "github.com/mattn/go-sqlite3" 7 | ) 8 | 9 | func Connect() (*sql.DB, error) { 10 | 11 | con, err := sql.Open("sqlite3", "../../data/feature_store.db") 12 | if err != nil { 13 | return nil, err 14 | } 15 | 16 | return con, nil 17 | 18 | } 19 | 20 | func GetUser(id string, con *sql.DB) (map[string]string, error) { 21 | 22 | query := ` 23 | SELECT 24 | dtRef AS dtRef, 25 | idCustomer AS idCustomer, 26 | prob_churn AS probChurn, 27 | cluster_recencia AS cicloVida, 28 | cluster_fv AS clusterRF, 29 | dtUpdate AS dtUpdate 30 | 31 | FROM customer_profile 32 | 33 | WHERE idCustomer = ? 34 | AND dtRef = (SELECT MAX(dtRef) FROM customer_profile) 35 | ` 36 | 37 | state, err := con.Prepare(query) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | rows, err := state.Query(id) 43 | if err != nil { 44 | return nil, err 45 | } 46 | 47 | var dtRef, idCustomer, probChurn, cicloVida, clusterRF, dtUpdate string 48 | for rows.Next() { 49 | rows.Scan(&dtRef, &idCustomer, &probChurn, &cicloVida, &clusterRF, &dtUpdate) 50 | } 51 | 52 | values := map[string]string{ 53 | "dtRef": dtRef, 54 | "idCustomer": idCustomer, 55 | "probChurn": probChurn, 56 | "cicloVida": cicloVida, 57 | "clusterRF": clusterRF, 58 | "dtUpdate": dtUpdate, 59 | } 60 | 61 | return values, nil 62 | } 63 | -------------------------------------------------------------------------------- /src/app/go.mod: -------------------------------------------------------------------------------- 1 | module app 2 | 3 | go 1.22.2 4 | 5 | require ( 6 | github.com/gin-gonic/gin v1.10.0 7 | github.com/mattn/go-sqlite3 v1.14.22 8 | ) 9 | 10 | require ( 11 | github.com/bytedance/sonic v1.11.6 // indirect 12 | github.com/bytedance/sonic/loader v0.1.1 // indirect 13 | github.com/cloudwego/base64x v0.1.4 // indirect 14 | github.com/cloudwego/iasm v0.2.0 // indirect 15 | github.com/gabriel-vasile/mimetype v1.4.3 // indirect 16 | github.com/gin-contrib/sse v0.1.0 // indirect 17 | github.com/go-playground/locales v0.14.1 // indirect 18 | github.com/go-playground/universal-translator v0.18.1 // indirect 19 | github.com/go-playground/validator/v10 v10.20.0 // indirect 20 | github.com/goccy/go-json v0.10.2 // indirect 21 | github.com/json-iterator/go v1.1.12 // indirect 22 | github.com/klauspost/cpuid/v2 v2.2.7 // indirect 23 | github.com/leodido/go-urn v1.4.0 // indirect 24 | github.com/mattn/go-isatty v0.0.20 // indirect 25 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 26 | github.com/modern-go/reflect2 v1.0.2 // indirect 27 | github.com/pelletier/go-toml/v2 v2.2.2 // indirect 28 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect 29 | github.com/ugorji/go/codec v1.2.12 // indirect 30 | golang.org/x/arch v0.8.0 // indirect 31 | golang.org/x/crypto v0.23.0 // indirect 32 | golang.org/x/net v0.25.0 // indirect 33 | golang.org/x/sys v0.20.0 // indirect 34 | golang.org/x/text v0.15.0 // indirect 35 | google.golang.org/protobuf v1.34.1 // indirect 36 | gopkg.in/yaml.v3 v3.0.1 // indirect 37 | ) 38 | -------------------------------------------------------------------------------- /src/app/go.sum: -------------------------------------------------------------------------------- 1 | github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= 2 | github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= 3 | github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= 4 | github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= 5 | github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= 6 | github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= 7 | github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= 8 | github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= 9 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 10 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 11 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 12 | github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= 13 | github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= 14 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= 15 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= 16 | github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= 17 | github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= 18 | github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= 19 | github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= 20 | github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= 21 | github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= 22 | github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= 23 | github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= 24 | github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8= 25 | github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= 26 | github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= 27 | github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= 28 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 29 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 30 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 31 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 32 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 33 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 34 | github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= 35 | github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= 36 | github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= 37 | github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= 38 | github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= 39 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 40 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 41 | github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= 42 | github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= 43 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 44 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 45 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 46 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 47 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 48 | github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= 49 | github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= 50 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 51 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 52 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 53 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 54 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 55 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 56 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 57 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 58 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 59 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 60 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 61 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 62 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 63 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 64 | github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= 65 | github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= 66 | github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= 67 | github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= 68 | golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= 69 | golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= 70 | golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= 71 | golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= 72 | golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= 73 | golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= 74 | golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= 75 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 76 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 77 | golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= 78 | golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 79 | golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= 80 | golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= 81 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 82 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 83 | google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= 84 | google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 85 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 86 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 87 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 88 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 89 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 90 | nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= 91 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 92 | -------------------------------------------------------------------------------- /src/feature_store/exec.sh: -------------------------------------------------------------------------------- 1 | python execute.py -f fs_general 2 | python execute.py -f fs_horario 3 | python execute.py -f fs_points 4 | python execute.py -f fs_produtos 5 | python execute.py -f fs_transacoes 6 | -------------------------------------------------------------------------------- /src/feature_store/execute.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import argparse 3 | import datetime 4 | 5 | import pandas as pd 6 | import sqlalchemy 7 | from sqlalchemy import exc 8 | 9 | from tqdm import tqdm 10 | 11 | def import_query(path): 12 | with open(path, 'r') as open_file: 13 | return open_file.read() 14 | 15 | 16 | def date_range(start, stop): 17 | dt_start = datetime.datetime.strptime(start, '%Y-%m-%d') 18 | dt_stop = datetime.datetime.strptime(stop, '%Y-%m-%d') 19 | dates = [] 20 | while dt_start <= dt_stop: 21 | dates.append(dt_start.strftime("%Y-%m-%d")) 22 | dt_start += datetime.timedelta(days=1) 23 | return dates 24 | 25 | 26 | def ingest_date(query, table, dt): 27 | 28 | # Substituição de '{date}' por uma data ex: 2024-06-06 29 | query_fmt = query.format(date=dt) 30 | 31 | # Executa e trás o resultado para o Python 32 | df = pd.read_sql(query_fmt, ORIGIN_ENGINE) 33 | 34 | # Delete os dados com a data de referência para garantir integridade 35 | with TARGET_ENGINE.connect() as con: 36 | try: 37 | state = f"DELETE FROM {table} WHERE dtRef = '{dt}';" 38 | con.execute(sqlalchemy.text(state)) 39 | con.commit() 40 | except exc.OperationalError as err: 41 | print("Tabela ainda não existe, criando ela...") 42 | 43 | # Enviando os dados para o novo database 44 | df.to_sql(table, TARGET_ENGINE, index=False, if_exists='append') 45 | 46 | 47 | # %% 48 | 49 | now = datetime.datetime.now().strftime("%Y-%m-%d") 50 | 51 | parser = argparse.ArgumentParser() 52 | parser.add_argument("--feature_store", "-f", help="Nome da feature Store", type=str) 53 | parser.add_argument("--start", "-s", help="Data de início", default=now, type=str) 54 | parser.add_argument("--stop", "-p", help="Data de fim", default=now, type=str) 55 | args = parser.parse_args() 56 | 57 | ORIGIN_ENGINE = sqlalchemy.create_engine("sqlite:///../../data/database.db") 58 | TARGET_ENGINE = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db") 59 | 60 | # Import da query 61 | query = import_query(f"{args.feature_store}.sql") 62 | dates = date_range(args.start, args.stop) 63 | 64 | for i in tqdm(dates): 65 | ingest_date(query, args.feature_store, i) 66 | 67 | -------------------------------------------------------------------------------- /src/feature_store/fs_general.sql: -------------------------------------------------------------------------------- 1 | WITH tb_rfv AS ( 2 | 3 | SELECT 4 | idCustomer, 5 | 6 | CAST(min(julianday('{date}') - julianday(dtTransaction)) 7 | AS INTEGER) + 1 AS recenciaDias, 8 | 9 | COUNT(DISTINCT DATE(dtTransaction)) AS frequenciaDias, 10 | 11 | SUM(CASE 12 | WHEN pointsTransaction > 0 THEN pointsTransaction 13 | END) AS valorPoints 14 | 15 | FROM transactions 16 | 17 | WHERE dtTransaction < '{date}' 18 | AND dtTransaction >= DATE('{date}', '-21 day') 19 | 20 | GROUP BY idCustomer 21 | ), 22 | 23 | tb_idade AS ( 24 | 25 | SELECT 26 | 27 | t1.idCustomer, 28 | 29 | CAST(MAX(julianday('{date}') - julianday(t2.dtTransaction)) 30 | AS INTEGER) + 1 AS idadeBaseDias 31 | 32 | FROM tb_rfv AS t1 33 | 34 | LEFT JOIN transactions AS t2 35 | ON t1.idCustomer = t2.idCustomer 36 | 37 | GROUP BY t2.idCustomer 38 | 39 | ) 40 | 41 | SELECT 42 | '{date}' AS dtRef, 43 | t1.*, 44 | t2.idadeBaseDias, 45 | t3.flEmail 46 | 47 | FROM tb_rfv AS t1 48 | 49 | LEFT JOIN tb_idade AS t2 50 | ON t1.idCustomer = t2.idCustomer 51 | 52 | LEFT JOIN customers AS t3 53 | ON t1.idCustomer = t3.idCustomer -------------------------------------------------------------------------------- /src/feature_store/fs_horario.sql: -------------------------------------------------------------------------------- 1 | WITH tb_transactions_hour AS ( 2 | 3 | SELECT idCustomer, 4 | pointsTransaction, 5 | CAST(STRFTIME('%H', DATETIME(dtTransaction, '-3 hour')) AS INTEGER) AS hour 6 | 7 | FROM transactions 8 | 9 | WHERE dtTransaction < '{date}' 10 | AND dtTransaction >= DATE('{date}', '-21 day') 11 | 12 | ), 13 | 14 | tb_share AS ( 15 | 16 | SELECT idCustomer, 17 | SUM(CASE WHEN hour >= 8 and hour < 12 THEN abs(pointsTransaction) ELSE 0 END) AS qtdPointsManha, 18 | SUM(CASE WHEN hour >= 12 and hour < 18 THEN abs(pointsTransaction) ELSE 0 END) AS qtdPointsTarde, 19 | SUM(CASE WHEN hour >= 18 and hour <= 23 THEN abs(pointsTransaction) ELSE 0 END) AS qtdPointsNoite, 20 | 21 | 1.0 * SUM(CASE WHEN hour >= 8 and hour < 12 THEN abs(pointsTransaction) ELSE 0 END) / SUM(abs(pointsTransaction)) AS pctPointsManha, 22 | 1.0 * SUM(CASE WHEN hour >= 12 and hour < 18 THEN abs(pointsTransaction) ELSE 0 END) / SUM(abs(pointsTransaction)) AS pctPointsTarde, 23 | 1.0 * SUM(CASE WHEN hour >= 18 and hour <= 23 THEN abs(pointsTransaction) ELSE 0 END) / SUM(abs(pointsTransaction)) AS pctPointsNoite, 24 | 25 | SUM(CASE WHEN hour >= 8 and hour < 12 THEN 1 ELSE 0 END) AS qtdTransacoesManha, 26 | SUM(CASE WHEN hour >= 12 and hour < 18 THEN 1 ELSE 0 END) AS qtdTransacoesTarde, 27 | SUM(CASE WHEN hour >= 18 and hour <= 23 THEN 1 ELSE 0 END) AS qtdTransacoesNoite, 28 | 29 | 1.0 * SUM(CASE WHEN hour >= 8 and hour < 12 THEN 1 ELSE 0 END) / SUM(1) AS pctTransacoesManha, 30 | 1.0 * SUM(CASE WHEN hour >= 12 and hour < 18 THEN 1 ELSE 0 END) / SUM(1) AS pctTransacoesTarde, 31 | 1.0 * SUM(CASE WHEN hour >= 18 and hour <= 23 THEN 1 ELSE 0 END) / SUM(1) AS pctTransacoesNoite 32 | 33 | FROM tb_transactions_hour 34 | 35 | GROUP BY idCustomer 36 | 37 | ) 38 | 39 | SELECT 40 | '{date}' AS dtRef, 41 | * 42 | 43 | FROM tb_share -------------------------------------------------------------------------------- /src/feature_store/fs_points.sql: -------------------------------------------------------------------------------- 1 | WITH tb_pontos_d AS ( 2 | 3 | SELECT idCustomer, 4 | 5 | SUM(pointsTransaction) AS saldoPointsD21, 6 | 7 | SUM(CASE WHEN dtTransaction >= DATE('{date}', '-14 day') 8 | THEN pointsTransaction 9 | ELSE 0 10 | END) AS saldoPointsD14, 11 | 12 | SUM(CASE WHEN dtTransaction >= DATE('{date}', '-7 day') 13 | THEN pointsTransaction 14 | ELSE 0 15 | END) AS saldoPointsD7, 16 | 17 | 18 | SUM(CASE WHEN pointsTransaction > 0 19 | THEN pointsTransaction 20 | ELSE 0 21 | END) AS pointsAcumuladosD21, 22 | 23 | SUM(CASE WHEN pointsTransaction > 0 24 | AND dtTransaction >= DATE('{date}', '-14 day') 25 | THEN pointsTransaction 26 | ELSE 0 27 | END) AS pointsAcumuladosD14, 28 | 29 | SUM(CASE WHEN pointsTransaction > 0 30 | AND dtTransaction >= DATE('{date}', '-7 day') 31 | THEN pointsTransaction 32 | ELSE 0 33 | END) AS pointsAcumuladosD7, 34 | 35 | 36 | SUM(CASE WHEN pointsTransaction < 0 37 | THEN pointsTransaction 38 | ELSE 0 39 | END) AS pointsResgatadosD21, 40 | 41 | SUM(CASE WHEN pointsTransaction < 0 42 | AND dtTransaction >= DATE('{date}', '-14 day') 43 | THEN pointsTransaction 44 | ELSE 0 45 | END) AS pointsResgatadosD14, 46 | 47 | SUM(CASE WHEN pointsTransaction < 0 48 | AND dtTransaction >= DATE('{date}', '-7 day') 49 | THEN pointsTransaction 50 | ELSE 0 51 | END) AS pointsResgatadosD7 52 | 53 | 54 | FROM transactions 55 | 56 | WHERE dtTransaction < '{date}' 57 | AND dtTransaction >= DATE('{date}', '-21 day') 58 | 59 | GROUP BY idCustomer 60 | 61 | ), 62 | 63 | tb_vida AS ( 64 | 65 | SELECT t1.idCustomer, 66 | SUM(t2.pointsTransaction) AS saldoPoints, 67 | SUM(CASE 68 | WHEN t2.pointsTransaction > 0 69 | THEN t2.pointsTransaction 70 | ELSE 0 71 | END) AS pointsAcumuladosVida, 72 | SUM(CASE 73 | WHEN t2.pointsTransaction < 0 74 | THEN t2.pointsTransaction 75 | ELSE 0 76 | END) AS pointsResgatadosVida, 77 | 78 | CAST(max(julianday('{date}') - julianday(dtTransaction)) AS INTEGER) + 1 AS diasVida 79 | 80 | FROM tb_pontos_d AS t1 81 | 82 | LEFT JOIN transactions AS t2 83 | ON t1.idCustomer = t2.idCustomer 84 | 85 | WHERE t2.dtTransaction < '{date}' 86 | 87 | GROUP BY t1.idCustomer 88 | 89 | ), 90 | 91 | tb_join AS ( 92 | 93 | SELECT 94 | t1.*, 95 | t2.saldoPoints, 96 | t2.pointsAcumuladosVida, 97 | t2.pointsResgatadosVida, 98 | 1.0 * t2.pointsAcumuladosVida / t2.diasVida AS pointsPorDia 99 | 100 | FROM tb_pontos_d As t1 101 | 102 | LEFT JOIN tb_vida AS t2 103 | ON t1.idCustomer = t2.idCustomer 104 | 105 | ) 106 | 107 | 108 | SELECT 109 | '{date}' AS dtRef, 110 | * 111 | FROM tb_join -------------------------------------------------------------------------------- /src/feature_store/fs_produtos.sql: -------------------------------------------------------------------------------- 1 | WITH tb_transactions_products AS ( 2 | 3 | SELECT t1.*, 4 | t2.NameProduct, 5 | t2.QuantityProduct 6 | 7 | FROM transactions AS t1 8 | 9 | LEFT JOIN transactions_product AS t2 10 | ON t1.idTransaction = t2.idTransaction 11 | 12 | WHERE t1.dtTransaction < '{date}' 13 | AND t1.dtTransaction >= DATE('{date}', '-21 day') 14 | 15 | ), 16 | 17 | tb_share AS ( 18 | 19 | SELECT 20 | 21 | idCustomer, 22 | 23 | SUM(CASE WHEN NameProduct = 'ChatMessage' THEN QuantityProduct ELSE 0 END) AS qtdeChatMessage, 24 | SUM(CASE WHEN NameProduct = 'Lista de presença' THEN QuantityProduct ELSE 0 END) AS qtdeListaPresença, 25 | SUM(CASE WHEN NameProduct = 'Resgatar Ponei' THEN QuantityProduct ELSE 0 END) AS qtdeResgatarPonei, 26 | SUM(CASE WHEN NameProduct = 'Troca de Pontos StreamElements' THEN QuantityProduct ELSE 0 END) AS qtdeTrocaPontos, 27 | SUM(CASE WHEN NameProduct = 'Presença Streak' THEN QuantityProduct ELSE 0 END) AS qtdePresençaStreak, 28 | SUM(CASE WHEN NameProduct = 'Airflow Lover' THEN QuantityProduct ELSE 0 END) AS qtdeAirflowLover, 29 | SUM(CASE WHEN NameProduct = 'R Lover' THEN QuantityProduct ELSE 0 END) AS qtdeRLover, 30 | 31 | SUM(CASE WHEN NameProduct = 'ChatMessage' THEN pointsTransaction ELSE 0 END) AS pointsChatMessage, 32 | SUM(CASE WHEN NameProduct = 'Lista de presença' THEN pointsTransaction ELSE 0 END) AS pointsListaPresença, 33 | SUM(CASE WHEN NameProduct = 'Resgatar Ponei' THEN pointsTransaction ELSE 0 END) AS pointsResgatarPonei, 34 | SUM(CASE WHEN NameProduct = 'Troca de Pontos StreamElements' THEN pointsTransaction ELSE 0 END) AS pointsTrocaPontos, 35 | SUM(CASE WHEN NameProduct = 'Presença Streak' THEN pointsTransaction ELSE 0 END) AS pointsPresençaStreak, 36 | SUM(CASE WHEN NameProduct = 'Airflow Lover' THEN pointsTransaction ELSE 0 END) AS pointsAirflowLover, 37 | SUM(CASE WHEN NameProduct = 'R Lover' THEN pointsTransaction ELSE 0 END) AS pointsRLover, 38 | 39 | 1.0 * SUM(CASE WHEN NameProduct = 'ChatMessage' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctChatMessage, 40 | 1.0 * SUM(CASE WHEN NameProduct = 'Lista de presença' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctListaPresença, 41 | 1.0 * SUM(CASE WHEN NameProduct = 'Resgatar Ponei' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctResgatarPonei, 42 | 1.0 * SUM(CASE WHEN NameProduct = 'Troca de Pontos StreamElements' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctTrocaPontos, 43 | 1.0 * SUM(CASE WHEN NameProduct = 'Presença Streak' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctPresençaStreak, 44 | 1.0 * SUM(CASE WHEN NameProduct = 'Airflow Lover' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctAirflowLover, 45 | 1.0 * SUM(CASE WHEN NameProduct = 'R Lover' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctRLover, 46 | 47 | 1.0 * SUM(CASE WHEN NameProduct = 'ChatMessage' THEN QuantityProduct ELSE 0 END) / COUNT(DISTINCT DATE(dtTransaction)) AS avgChatLive 48 | 49 | FROM tb_transactions_products 50 | 51 | GROUP BY idCustomer 52 | 53 | ), 54 | 55 | tb_group AS ( 56 | 57 | SELECT idCustomer, 58 | NameProduct, 59 | sum(QuantityProduct) AS qtde, 60 | sum(pointsTransaction) AS points 61 | 62 | FROM tb_transactions_products 63 | GROUP BY idCustomer, NameProduct 64 | 65 | ), 66 | 67 | tb_rn AS ( 68 | 69 | SELECT *, 70 | ROW_NUMBER() OVER (PARTITION BY idCustomer ORDER BY qtde DESC, points DESC) AS rnQtde 71 | 72 | from tb_group 73 | order by idCustomer 74 | 75 | ), 76 | 77 | tb_produto_max AS ( 78 | 79 | SELECT * 80 | FROM tb_rn 81 | WHERE rnQtde = 1 82 | 83 | ) 84 | 85 | SELECT 86 | '{date}' AS dtRef, 87 | t1.*, 88 | t2.NameProduct AS productMaxQtde 89 | 90 | FROM tb_share AS t1 91 | 92 | LEFT JOIN tb_produto_max AS t2 93 | ON t1.idCustomer = t2.idCustomer 94 | 95 | -------------------------------------------------------------------------------- /src/feature_store/fs_transacoes.sql: -------------------------------------------------------------------------------- 1 | WITH tb_transactions AS ( 2 | 3 | SELECT * 4 | FROM transactions 5 | WHERE dtTransaction < '{date}' 6 | AND dtTransaction >= DATE('{date}', '-21 day') 7 | 8 | ), 9 | 10 | tb_freq AS ( 11 | 12 | SELECT 13 | idCustomer, 14 | count(distinct date(dtTransaction)) AS qtdeDiasD21, 15 | count(distinct CASE WHEN dtTransaction > date('{date}', '-14 day') THEN date(dtTransaction) END) AS qtdeDiasD14, 16 | count(distinct CASE WHEN dtTransaction > date('{date}', '-7 day') THEN date(dtTransaction) END) AS qtdeDiasD7 17 | 18 | FROM tb_transactions 19 | 20 | GROUP BY idCustomer 21 | ), 22 | 23 | tb_live_minutes AS ( 24 | 25 | SELECT idCustomer, 26 | date(datetime(dtTransaction, '-3 hour')) AS dtTransactionDate, 27 | min(datetime(dtTransaction, '-3 hour')) AS dtInicio, 28 | max(datetime(dtTransaction, '-3 hour')) AS dtFim, 29 | (julianday(max(datetime(dtTransaction, '-3 hour'))) - 30 | julianday(min(datetime(dtTransaction, '-3 hour')))) * 24 * 60 AS liveMinutes 31 | 32 | FROM tb_transactions 33 | 34 | GROUP BY 1,2 35 | 36 | ), 37 | 38 | tb_hours AS ( 39 | 40 | SELECT idCustomer, 41 | AVG(liveMinutes) AS avgLiveMinutes, 42 | SUM(liveMinutes) AS sumLiveMinutes, 43 | MIN(liveMinutes) AS minLiveMinutes, 44 | MAX(liveMinutes) AS maxLiveMinutes 45 | FROM tb_live_minutes 46 | GROUP BY idCustomer 47 | ), 48 | 49 | tb_vida AS ( 50 | 51 | SELECT idCustomer, 52 | COUNT(DISTINCT idTransaction) AS qtdeTransacaoVida, 53 | COUNT(DISTINCT idTransaction) / (max(julianday('{date}') - julianday(dtTransaction))) AS avgTransacaoDia 54 | 55 | FROM transactions 56 | WHERE dtTransaction < '{date}' 57 | GROUP BY idCustomer 58 | 59 | ), 60 | 61 | tb_join AS ( 62 | 63 | SELECT t1.*, 64 | t2.avgLiveMinutes, 65 | t2.sumLiveMinutes, 66 | t2.minLiveMinutes, 67 | t2.maxLiveMinutes, 68 | t3.qtdeTransacaoVida, 69 | t3.avgTransacaoDia 70 | 71 | FROM tb_freq AS t1 72 | 73 | LEFT JOIN tb_hours AS t2 74 | ON t1.idCustomer = t2.idCustomer 75 | 76 | LEFT JOIN tb_vida AS t3 77 | ON t3.idCustomer = t1.idCustomer 78 | ) 79 | 80 | SELECT 81 | '{date}' AS dtRef, 82 | * 83 | 84 | FROM tb_join -------------------------------------------------------------------------------- /src/pipeline.sh: -------------------------------------------------------------------------------- 1 | cd feature_store 2 | bash exec.sh 3 | cd ../predict 4 | python profile_user.py -------------------------------------------------------------------------------- /src/predict/etl.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | t1.dtRef, 3 | t1.idCustomer, 4 | t1.recenciaDias, 5 | t1.frequenciaDias, 6 | t1.valorPoints, 7 | t1.idadeBaseDias, 8 | t1.flEmail, 9 | t3.qtdPointsManha, 10 | t3.qtdPointsTarde, 11 | t3.qtdPointsNoite, 12 | t3.pctPointsManha, 13 | t3.pctPointsTarde, 14 | t3.pctPointsNoite, 15 | t3.qtdTransacoesManha, 16 | t3.qtdTransacoesTarde, 17 | t3.qtdTransacoesNoite, 18 | t3.pctTransacoesManha, 19 | t3.pctTransacoesTarde, 20 | t3.pctTransacoesNoite, 21 | t4.saldoPointsD21, 22 | t4.saldoPointsD14, 23 | t4.saldoPointsD7, 24 | t4.pointsAcumuladosD21, 25 | t4.pointsAcumuladosD14, 26 | t4.pointsAcumuladosD7, 27 | t4.pointsResgatadosD21, 28 | t4.pointsResgatadosD14, 29 | t4.pointsResgatadosD7, 30 | t4.saldoPoints, 31 | t4.pointsAcumuladosVida, 32 | t4.pointsResgatadosVida, 33 | t4.pointsPorDia, 34 | t5.qtdeChatMessage, 35 | t5.qtdeListaPresença, 36 | t5.qtdeResgatarPonei, 37 | t5.qtdeTrocaPontos, 38 | t5.qtdePresençaStreak, 39 | t5.qtdeAirflowLover, 40 | t5.qtdeRLover, 41 | t5.pointsChatMessage, 42 | t5.pointsListaPresença, 43 | t5.pointsResgatarPonei, 44 | t5.pointsTrocaPontos, 45 | t5.pointsPresençaStreak, 46 | t5.pointsAirflowLover, 47 | t5.pointsRLover, 48 | t5.pctChatMessage, 49 | t5.pctListaPresença, 50 | t5.pctResgatarPonei, 51 | t5.pctTrocaPontos, 52 | t5.pctPresençaStreak, 53 | t5.pctAirflowLover, 54 | t5.pctRLover, 55 | t5.avgChatLive, 56 | t5.productMaxQtde, 57 | t6.qtdeDiasD21, 58 | t6.qtdeDiasD14, 59 | t6.qtdeDiasD7, 60 | t6.avgLiveMinutes, 61 | t6.sumLiveMinutes, 62 | t6.minLiveMinutes, 63 | t6.maxLiveMinutes, 64 | t6.qtdeTransacaoVida, 65 | t6.avgTransacaoDia 66 | 67 | FROM fs_general AS t1 68 | 69 | LEFT JOIN fs_horario AS t3 70 | ON t1.idCustomer = t3.idCustomer 71 | AND t1.dtRef = t3.dtRef 72 | 73 | LEFT JOIN fs_points AS t4 74 | ON t1.idCustomer = t4.idCustomer 75 | AND t1.dtRef = t4.dtRef 76 | 77 | LEFT JOIN fs_produtos AS t5 78 | ON t1.idCustomer = t5.idCustomer 79 | AND t1.dtRef = t5.dtRef 80 | 81 | LEFT JOIN fs_transacoes AS t6 82 | ON t1.idCustomer = t6.idCustomer 83 | AND t1.dtRef = t6.dtRef 84 | 85 | WHERE t1.DtRef = date('now') -------------------------------------------------------------------------------- /src/predict/predict.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import sqlalchemy 4 | from sqlalchemy import exc 5 | 6 | import mlflow 7 | import mlflow.sklearn 8 | 9 | import json 10 | 11 | print("Scrip para execução de modelos iniciado!") 12 | 13 | print("Carregando modelo...") 14 | mlflow.set_tracking_uri("http://192.168.1.100:8081") 15 | model = mlflow.sklearn.load_model("models:/Churn-Teo-Me-Why/production") 16 | 17 | # %% 18 | print("Carregando as features do modelo...") 19 | model_info = mlflow.models.get_model_info("models:/Churn-Teo-Me-Why/production") 20 | features = [i['name'] for i in json.loads(model_info.signature_dict['inputs'])] 21 | features 22 | 23 | # %% 24 | print("Carregando base para score...") 25 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db") 26 | with open("etl.sql", 'r') as open_file: 27 | query = open_file.read() 28 | 29 | df = pd.read_sql(query, engine) 30 | 31 | # %% 32 | print("Realizando predições...") 33 | pred = model.predict_proba(df[features]) 34 | proba_churn = pred[:,1] 35 | 36 | # %% 37 | 38 | print("Persistindo dados...") 39 | df_predict = df[['dtRef', 'idCustomer']].copy() 40 | df_predict['probaChurn'] = proba_churn.copy() 41 | df_predict = (df_predict.sort_values("probaChurn", ascending=False) 42 | .reset_index(drop=True)) 43 | 44 | with engine.connect() as con: 45 | state = f"DELETE FROM tb_churn WHERE dtRef = '{df_predict['dtRef'].min()}';" 46 | 47 | try: 48 | state = sqlalchemy.text(state) 49 | con.execute(state) 50 | con.commit() 51 | except exc.OperationalError as err: 52 | print("Tabela ainda não existe...") 53 | 54 | df_predict.to_sql("tb_churn", engine, if_exists='append', index=False) 55 | 56 | print("Fim.") -------------------------------------------------------------------------------- /src/predict/profile_user.py: -------------------------------------------------------------------------------- 1 | # %% 2 | 3 | import pandas as pd 4 | import sqlalchemy 5 | import datetime 6 | 7 | cluster_recencia = pd.read_pickle("../../models/cluster_recencia.pkl") 8 | cluster_fv = pd.read_pickle("../../models/cluster_fv.pkl") 9 | model_churn = pd.read_pickle("../../models/rf_2024_06_19.pkl") 10 | 11 | # %% 12 | 13 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db") 14 | 15 | with open("etl.sql", 'r') as open_file: 16 | query = open_file.read() 17 | 18 | df = pd.read_sql(query, engine) 19 | 20 | # %% 21 | 22 | df['prob_churn'] = model_churn['model'].predict_proba(df[model_churn['features']])[:,1] 23 | df['cluster_recencia'] = cluster_recencia['model'].predict(df[cluster_recencia['features']]) 24 | df['cluster_fv'] = cluster_fv['model'].predict(df[cluster_fv['features']]) 25 | 26 | columns = ['dtRef', 'idCustomer', 'prob_churn','cluster_recencia','cluster_fv'] 27 | 28 | df_final = df[columns].copy() 29 | df_final['dtUpdate'] = datetime.datetime.now() 30 | 31 | df_final.to_sql('customer_profile', engine, index=False, if_exists='replace') 32 | # %% 33 | -------------------------------------------------------------------------------- /src/rfv/analise_freq_valor.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import sqlalchemy 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | 7 | from sklearn import cluster 8 | from sklearn import tree 9 | from sklearn import preprocessing 10 | 11 | # %% 12 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db") 13 | 14 | query = ''' 15 | 16 | SELECT * 17 | FROM fs_general 18 | WHERE dtRef = (select max(dtRef) FROM fs_general) 19 | 20 | ''' 21 | 22 | df = pd.read_sql(query, engine) 23 | df 24 | # %% 25 | plt.figure(dpi=400) 26 | sns.set_theme(style="darkgrid") 27 | sns.scatterplot( 28 | data=df, 29 | x="valorPoints", 30 | y="frequenciaDias", 31 | ) 32 | 33 | plt.title("Frequencia vs Valor") 34 | plt.show() 35 | # %% 36 | 37 | 38 | minmax = preprocessing.MinMaxScaler() 39 | 40 | X_trans = minmax.fit_transform(df[['valorPoints','frequenciaDias']]) 41 | 42 | # cluster_method = cluster.KMeans(n_clusters=5) 43 | cluster_method = cluster.AgglomerativeClustering(linkage='ward',n_clusters=5,) 44 | cluster_method.fit(X_trans) 45 | 46 | df['cluster'] = cluster_method.labels_ 47 | 48 | plt.figure(dpi=400) 49 | 50 | for i in df['cluster'].unique(): 51 | data = df[df['cluster']==i] 52 | sns.scatterplot( 53 | data=data, 54 | x="valorPoints", 55 | y="frequenciaDias", 56 | ) 57 | 58 | plt.hlines(7.5, xmin=0,xmax=3000) 59 | plt.hlines(3.5, xmin=0,xmax=3000) 60 | plt.hlines(10.5, xmin=0,xmax=3000) 61 | plt.vlines(500, ymin=0,ymax=18) 62 | plt.vlines(1500, ymin=0,ymax=18) 63 | 64 | plt.show() 65 | df.groupby("cluster")['idCustomer'].count() 66 | 67 | # %% 68 | 69 | def rf_cluster(row): 70 | 71 | if (row['valorPoints'] < 500): 72 | if (row['frequenciaDias'] < 3.5): 73 | return "01-BB" 74 | 75 | elif (row['frequenciaDias'] < 7.5): 76 | return "02-MB" 77 | 78 | elif (row['frequenciaDias'] < 10.5): 79 | return "03-AB" 80 | 81 | else: 82 | return "04-SB" 83 | 84 | elif (row['valorPoints'] < 1600): 85 | if (row['frequenciaDias'] < 3.5): 86 | return "05-BM" 87 | 88 | elif (row['frequenciaDias'] < 7.5): 89 | return "06-MM" 90 | 91 | elif (row['frequenciaDias'] < 10.5): 92 | return "07-AM" 93 | 94 | else: 95 | return "08-SM" 96 | 97 | else: 98 | if (row['frequenciaDias'] < 3.5): 99 | return "09-BA" 100 | 101 | elif (row['frequenciaDias'] < 7.5): 102 | return "10-MA" 103 | 104 | elif (row['frequenciaDias'] < 10.5): 105 | return "11-AA" 106 | 107 | else: 108 | return "12-SA" 109 | 110 | df['cluster_rf'] = df.apply(rf_cluster, axis=1) 111 | 112 | plt.figure(dpi=400) 113 | 114 | for i in df['cluster_rf'].unique(): 115 | data = df[df['cluster_rf']==i] 116 | sns.scatterplot( 117 | data=data, 118 | x="valorPoints", 119 | y="frequenciaDias", 120 | ) 121 | 122 | plt.title("Cluster Frequencia vs Valor") 123 | plt.legend(df['cluster_rf'].unique()) 124 | 125 | # %% 126 | 127 | clf = tree.DecisionTreeClassifier(random_state=42, 128 | min_samples_leaf=1, 129 | max_depth=None) 130 | 131 | clf.fit(df[['frequenciaDias', 'valorPoints']], df['cluster_rf']) 132 | 133 | model_freq_valor = pd.Series( 134 | {"model": clf, 135 | "features": ['frequenciaDias', 'valorPoints']} 136 | ) 137 | 138 | model_freq_valor.to_pickle("../../models/cluster_fv.pkl") 139 | 140 | # %% 141 | 142 | -------------------------------------------------------------------------------- /src/rfv/analise_recencia.py: -------------------------------------------------------------------------------- 1 | # %% 2 | 3 | import pandas as pd 4 | import sqlalchemy 5 | 6 | from sklearn import tree 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | def ciclo_vida(row): 11 | 12 | if row['idadeBaseDias'] <=7: 13 | return '01-Nova' 14 | 15 | elif row['recenciaDias'] <= 2: 16 | return '02-Super Ativa' 17 | 18 | elif row['recenciaDias'] <= 6: 19 | return '03-Ativa Comum' 20 | 21 | elif row['recenciaDias'] <= 12: 22 | return '04-Ativa Fria' 23 | 24 | elif row['recenciaDias'] <= 18: 25 | return '05-Desiludida' 26 | 27 | else: 28 | return '06-Pre Churn' 29 | 30 | # %% 31 | 32 | # if __name__ == "__main__": 33 | 34 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db") 35 | 36 | query = ''' 37 | 38 | SELECT * 39 | FROM fs_general 40 | WHERE dtRef = (select max(dtRef) FROM fs_general) 41 | 42 | ''' 43 | 44 | df = pd.read_sql(query, engine) 45 | 46 | plt.figure(dpi=400) 47 | df["recenciaDias"].hist() 48 | plt.show() 49 | 50 | df_recencia = df[["recenciaDias", 'idadeBaseDias']].sort_values(by="recenciaDias").reset_index(drop=True) 51 | df_recencia["unit"] = 1 52 | df_recencia['Acum'] = df_recencia['unit'].cumsum() 53 | df_recencia["Pct Acum"] = df_recencia['Acum'] / df_recencia['Acum'].max() 54 | 55 | plt.plot(df_recencia["recenciaDias"], df_recencia["Pct Acum"], '-') 56 | plt.grid(True) 57 | plt.title("Dist. Recencia Acumulada") 58 | plt.xlabel("Recencia") 59 | plt.ylabel("Pct Acum.") 60 | 61 | df_recencia['CicloVida'] = df_recencia.apply(ciclo_vida, axis=1) 62 | df_recencia.groupby(by=['CicloVida']).agg({ 63 | "recenciaDias":['mean', 'count'], 64 | "idadeBaseDias":['mean'], 65 | }) 66 | 67 | 68 | # %% 69 | 70 | clf = tree.DecisionTreeClassifier(min_samples_leaf=1, max_depth=50, random_state=42) 71 | clf.fit(df_recencia[['recenciaDias', 'idadeBaseDias']], df_recencia['CicloVida']) 72 | model = pd.Series( 73 | { 74 | "model":clf, 75 | "features":['recenciaDias', 'idadeBaseDias'] 76 | } 77 | ) 78 | 79 | model.to_pickle("../../models/cluster_recencia.pkl") 80 | # %% 81 | -------------------------------------------------------------------------------- /src/train/abt.sql: -------------------------------------------------------------------------------- 1 | WITH tb_fl_churn AS ( 2 | 3 | SELECT t1.dtRef, 4 | t1.idCustomer, 5 | CASE WHEN t2.idCustomer IS NULL THEN 1 ELSE 0 END AS flChurn 6 | 7 | FROM fs_general AS t1 8 | 9 | LEFT JOIN fs_general AS t2 10 | ON t1.idCustomer = t2.idCustomer 11 | AND t1.dtRef = date(t2.dtRef, '-21 day') 12 | 13 | WHERE (t1.dtRef < DATE('2024-06-20', '-21 day') 14 | AND strftime('%d', t1.dtRef) = '01') 15 | OR t1.dtRef = DATE('2024-06-20', '-21 day') 16 | 17 | order by 1,2 18 | 19 | ) 20 | 21 | SELECT t1.*, 22 | 23 | t2.recenciaDias, 24 | t2.frequenciaDias, 25 | t2.valorPoints, 26 | t2.idadeBaseDias, 27 | t2.flEmail, 28 | t3.qtdPointsManha, 29 | t3.qtdPointsTarde, 30 | t3.qtdPointsNoite, 31 | t3.pctPointsManha, 32 | t3.pctPointsTarde, 33 | t3.pctPointsNoite, 34 | t3.qtdTransacoesManha, 35 | t3.qtdTransacoesTarde, 36 | t3.qtdTransacoesNoite, 37 | t3.pctTransacoesManha, 38 | t3.pctTransacoesTarde, 39 | t3.pctTransacoesNoite, 40 | t4.saldoPointsD21, 41 | t4.saldoPointsD14, 42 | t4.saldoPointsD7, 43 | t4.pointsAcumuladosD21, 44 | t4.pointsAcumuladosD14, 45 | t4.pointsAcumuladosD7, 46 | t4.pointsResgatadosD21, 47 | t4.pointsResgatadosD14, 48 | t4.pointsResgatadosD7, 49 | t4.saldoPoints, 50 | t4.pointsAcumuladosVida, 51 | t4.pointsResgatadosVida, 52 | t4.pointsPorDia, 53 | t5.qtdeChatMessage, 54 | t5.qtdeListaPresença, 55 | t5.qtdeResgatarPonei, 56 | t5.qtdeTrocaPontos, 57 | t5.qtdePresençaStreak, 58 | t5.qtdeAirflowLover, 59 | t5.qtdeRLover, 60 | t5.pointsChatMessage, 61 | t5.pointsListaPresença, 62 | t5.pointsResgatarPonei, 63 | t5.pointsTrocaPontos, 64 | t5.pointsPresençaStreak, 65 | t5.pointsAirflowLover, 66 | t5.pointsRLover, 67 | t5.pctChatMessage, 68 | t5.pctListaPresença, 69 | t5.pctResgatarPonei, 70 | t5.pctTrocaPontos, 71 | t5.pctPresençaStreak, 72 | t5.pctAirflowLover, 73 | t5.pctRLover, 74 | t5.avgChatLive, 75 | t5.productMaxQtde, 76 | t6.qtdeDiasD21, 77 | t6.qtdeDiasD14, 78 | t6.qtdeDiasD7, 79 | t6.avgLiveMinutes, 80 | t6.sumLiveMinutes, 81 | t6.minLiveMinutes, 82 | t6.maxLiveMinutes, 83 | t6.qtdeTransacaoVida, 84 | t6.avgTransacaoDia 85 | 86 | FROM tb_fl_churn AS t1 87 | 88 | LEFT JOIN fs_general AS t2 89 | ON t1.idCustomer = t2.idCustomer 90 | AND t1.dtRef = t2.dtRef 91 | 92 | LEFT JOIN fs_horario AS t3 93 | ON t1.idCustomer = t3.idCustomer 94 | AND t1.dtRef = t3.dtRef 95 | 96 | LEFT JOIN fs_points AS t4 97 | ON t1.idCustomer = t4.idCustomer 98 | AND t1.dtRef = t4.dtRef 99 | 100 | LEFT JOIN fs_produtos AS t5 101 | ON t1.idCustomer = t5.idCustomer 102 | AND t1.dtRef = t5.dtRef 103 | 104 | LEFT JOIN fs_transacoes AS t6 105 | ON t1.idCustomer = t6.idCustomer 106 | AND t1.dtRef = t6.dtRef -------------------------------------------------------------------------------- /src/train/export_abt.py: -------------------------------------------------------------------------------- 1 | # %% 2 | 3 | import pandas as pd 4 | import sqlalchemy 5 | 6 | from sklearn import model_selection 7 | 8 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db") 9 | 10 | # %% 11 | with open("abt.sql", 'r') as open_file: 12 | query = open_file.read() 13 | 14 | df = pd.read_sql(query, engine) 15 | 16 | oot = df[df['dtRef'] == df['dtRef'].max()].copy() 17 | df_train = df[df['dtRef'] < df['dtRef'].max()].copy() 18 | 19 | train, test = model_selection.train_test_split(df_train, 20 | random_state=42, 21 | stratify=df_train['flChurn']) 22 | 23 | train['partition_set_name'] = 'train' 24 | test['partition_set_name'] = 'test' 25 | oot['partition_set_name'] = 'oot' 26 | 27 | # %% 28 | 29 | df_full = pd.concat( [train,test,oot], axis=0, ignore_index=True ) 30 | df_full.to_csv("../../data/abt_churn_20240620.csv", index=False, sep=";") 31 | -------------------------------------------------------------------------------- /src/train/semma_ex.py: -------------------------------------------------------------------------------- 1 | # %% 2 | 3 | import pandas as pd 4 | 5 | from sklearn import model_selection 6 | from sklearn import ensemble 7 | from sklearn import pipeline 8 | from sklearn import metrics 9 | 10 | from feature_engine import selection 11 | from feature_engine import encoding 12 | 13 | # %% 14 | 15 | ## SAMPLE 16 | df = pd.read_excel("../../data/abt_churn.xlsx") 17 | 18 | target = 'flChurn' 19 | features = df.columns.tolist()[4:] 20 | 21 | # %% 22 | ### DATA PARTITION 23 | df_oot = df[ df['dtRef'] == df['dtRef'].max()] 24 | 25 | df_train = df[ df['dtRef'] < df['dtRef'].max()] 26 | 27 | # %% 28 | ### SAMPLING 29 | 30 | X_train, X_test, y_train, y_test = model_selection.train_test_split( 31 | df_train[features], df_train[target], 32 | train_size=0.8, 33 | random_state=42, 34 | stratify=df_train[target] 35 | ) 36 | 37 | print("Taxa de resposta Train:", y_train.mean()) 38 | print("Taxa de resposta Test:", y_test.mean()) 39 | 40 | # %% 41 | ## EXPLORE 42 | describe = X_train.describe() 43 | na_values = X_train.isna().sum().sort_values() 44 | 45 | df_eda = X_train 46 | df_eda[target] = y_train 47 | df_eda.groupby(["flChurn"]).describe().T.head(50) 48 | 49 | # %% 50 | ## MODIFY 51 | 52 | cat_features = X_train.dtypes[X_train.dtypes == 'object'].index.tolist() 53 | X_train[cat_features] 54 | 55 | to_drop = ['pointsPorDia', 'avgChatLive'] 56 | 57 | drop = selection.DropFeatures(features_to_drop=to_drop) 58 | onehot = encoding.OneHotEncoder(variables=['productMaxQtde']) 59 | 60 | # %% 61 | # MODEL 62 | 63 | model = ensemble.RandomForestClassifier(random_state=42) 64 | 65 | params = { 66 | "max_depth": [4,5,8,10,15], 67 | "min_samples_leaf": [10,15,20,50,100], 68 | "n_estimators":[100,200,500] 69 | } 70 | 71 | grid = model_selection.GridSearchCV(model, 72 | param_grid=params, 73 | scoring='roc_auc', 74 | cv=3, 75 | n_jobs=10) 76 | 77 | # %% 78 | 79 | model_pipe = pipeline.Pipeline([ 80 | ('drop', drop), 81 | ('onehot', onehot), 82 | ('model', grid) 83 | ]) 84 | 85 | model_pipe.fit(X_train[features], y_train) 86 | 87 | # %% 88 | ## ASSESS 89 | 90 | train_pred = model_pipe.predict_proba(X_train[features]) 91 | test_pred = model_pipe.predict_proba(X_test[features]) 92 | oot_pred = model_pipe.predict_proba(df_oot[features]) 93 | 94 | auc_train = metrics.roc_auc_score(y_train, train_pred[:,1]) 95 | auc_test = metrics.roc_auc_score(y_test, test_pred[:,1]) 96 | auc_oot = metrics.roc_auc_score(df_oot[target], oot_pred[:,1]) 97 | 98 | print("AUC Score train:", auc_train) 99 | print("AUC Score test:", auc_test) 100 | print("AUC Score oot:", auc_oot) 101 | 102 | metrics_values = { 103 | "train": auc_train, 104 | "test": auc_test, 105 | "oot": auc_oot, 106 | } 107 | 108 | model_export = pd.Series( 109 | { 110 | "model":model_pipe, 111 | "features":features, 112 | "metrics": metrics_values, 113 | } 114 | ) 115 | 116 | model_export.to_pickle("../../models/rf_2024_06_19.pkl") -------------------------------------------------------------------------------- /src/train/train.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import datetime 3 | 4 | import pandas as pd 5 | import sqlalchemy 6 | 7 | from sklearn import ensemble 8 | from sklearn import metrics 9 | from sklearn import model_selection 10 | from sklearn import pipeline 11 | 12 | from feature_engine import encoding 13 | 14 | # %% 15 | 16 | # Aqui eu tenho a conexão com o banco de dados 17 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db") 18 | 19 | # Aqui eu tenho a query 20 | with open('abt.sql', 'r') as open_file: 21 | query = open_file.read() 22 | 23 | # Aqui processa e tras os dados 24 | df = pd.read_sql(query, engine) 25 | 26 | df.head() 27 | # %% 28 | ## Separação de bases entrei treino e oot 29 | 30 | df_oot = df[df['dtRef']==df['dtRef'].max()] 31 | df_train = df[df['dtRef']cohort).astype(int) 100 | 101 | acc = metrics.accuracy_score(y_true, y_pred) 102 | auc = metrics.roc_auc_score(y_true, y_proba[:,1]) 103 | precision = metrics.precision_score(y_true, y_pred) 104 | recall = metrics.recall_score(y_true, y_pred) 105 | 106 | res = { 107 | 'Acurárica': acc, 108 | 'Curva Roc': auc, 109 | "Precisão": precision, 110 | "Recall": recall, 111 | } 112 | 113 | return res 114 | 115 | report_train = report_metrics(y_train, y_train_proba) 116 | report_train['base'] = 'Train' 117 | 118 | report_test = report_metrics(y_test, y_test_proba) 119 | report_test['base'] = 'Test' 120 | 121 | report_oot = report_metrics(df_oot[target], y_oot_proba) 122 | report_oot['base'] = 'Oot' 123 | 124 | df_metrics = pd.DataFrame([report_train,report_test,report_oot]) 125 | print(df_metrics) 126 | 127 | # %% 128 | 129 | model_series = pd.Series({ 130 | "model": model_pipeline, 131 | "features": features, 132 | "metrics": df_metrics, 133 | "dt_train": datetime.datetime.now() 134 | }) 135 | 136 | model_series.to_pickle("../../models/rf_teo_fim_curso.pkl") 137 | -------------------------------------------------------------------------------- /src/train/train_mlflow.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import datetime 3 | 4 | import pandas as pd 5 | import sqlalchemy 6 | 7 | import mlflow 8 | 9 | from sklearn import ensemble 10 | from sklearn import metrics 11 | from sklearn import model_selection 12 | from sklearn import pipeline 13 | 14 | from feature_engine import encoding 15 | 16 | # %% 17 | 18 | # Aqui eu tenho a conexão com o banco de dados 19 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db") 20 | 21 | # Aqui eu tenho a query 22 | with open('abt.sql', 'r') as open_file: 23 | query = open_file.read() 24 | 25 | # Aqui processa e tras os dados 26 | df = pd.read_sql(query, engine) 27 | 28 | df.head() 29 | # %% 30 | ## Separação de bases entrei treino e oot 31 | 32 | df_oot = df[df['dtRef']==df['dtRef'].max()] 33 | df_train = df[df['dtRef']cohort).astype(int) 75 | 76 | acc = metrics.accuracy_score(y_true, y_pred) 77 | auc = metrics.roc_auc_score(y_true, y_proba[:,1]) 78 | precision = metrics.precision_score(y_true, y_pred) 79 | recall = metrics.recall_score(y_true, y_pred) 80 | 81 | res = { 82 | f'{base} Acurárica': acc, 83 | f'{base} Curva Roc': auc, 84 | f"{base} Precisão": precision, 85 | f"{base} Recall": recall, 86 | } 87 | 88 | return res 89 | 90 | with mlflow.start_run(): 91 | 92 | onehot = encoding.OneHotEncoder(variables=cat_features, 93 | drop_last=True) 94 | 95 | model = ensemble.GradientBoostingClassifier(random_state=42) 96 | 97 | params = {"learning_rate": [0.01,0.1,0.2,0.5,0.75,0.9,0.99], 98 | "n_estimators": [50,100,200,500], 99 | "subsample": [0.1,0.5,0.9], 100 | "min_samples_leaf":[5,10,25,50,100] 101 | } 102 | 103 | grid = model_selection.GridSearchCV(model, 104 | param_grid=params, 105 | cv=3, 106 | scoring='roc_auc', 107 | n_jobs=-2, 108 | verbose=3) 109 | 110 | model_pipeline = pipeline.Pipeline([ 111 | ('One Hot Encode', onehot), 112 | ('Modelo', grid) 113 | ]) 114 | 115 | # Ajuste de modelo 116 | model_pipeline.fit(X_train, y_train) 117 | 118 | y_train_proba = model_pipeline.predict_proba(X_train) 119 | y_test_proba = model_pipeline.predict_proba(X_test) 120 | y_oot_proba = model_pipeline.predict_proba(df_oot[features]) 121 | 122 | report = {} 123 | report.update(report_metrics(y_train, y_train_proba, 'treino')) 124 | report.update(report_metrics(y_test, y_test_proba, 'teste')) 125 | report.update(report_metrics(df_oot[target], y_oot_proba, 'ott')) 126 | 127 | mlflow.log_metrics(report) 128 | --------------------------------------------------------------------------------