├── .gitignore
├── LICENSE
├── README.md
├── data
├── .gitkeep
└── query.sql
├── models
└── .gitkeep
├── requirements.txt
└── src
├── .gitkeep
├── app
├── api.go
├── db
│ └── db.go
├── go.mod
└── go.sum
├── feature_store
├── exec.sh
├── execute.py
├── fs_general.sql
├── fs_horario.sql
├── fs_points.sql
├── fs_produtos.sql
└── fs_transacoes.sql
├── pipeline.sh
├── predict
├── etl.sql
├── predict.py
└── profile_user.py
├── rfv
├── analise_freq_valor.py
└── analise_recencia.py
└── train
├── abt.sql
├── export_abt.py
├── semma_ex.py
├── train.py
└── train_mlflow.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.db
3 | *.csv
4 | *.xlsx
5 | *.pkl
6 | dsenv/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Attribution-NonCommercial-ShareAlike 4.0 International
2 |
3 | =======================================================================
4 |
5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
6 | does not provide legal services or legal advice. Distribution of
7 | Creative Commons public licenses does not create a lawyer-client or
8 | other relationship. Creative Commons makes its licenses and related
9 | information available on an "as-is" basis. Creative Commons gives no
10 | warranties regarding its licenses, any material licensed under their
11 | terms and conditions, or any related information. Creative Commons
12 | disclaims all liability for damages resulting from their use to the
13 | fullest extent possible.
14 |
15 | Using Creative Commons Public Licenses
16 |
17 | Creative Commons public licenses provide a standard set of terms and
18 | conditions that creators and other rights holders may use to share
19 | original works of authorship and other material subject to copyright
20 | and certain other rights specified in the public license below. The
21 | following considerations are for informational purposes only, are not
22 | exhaustive, and do not form part of our licenses.
23 |
24 | Considerations for licensors: Our public licenses are
25 | intended for use by those authorized to give the public
26 | permission to use material in ways otherwise restricted by
27 | copyright and certain other rights. Our licenses are
28 | irrevocable. Licensors should read and understand the terms
29 | and conditions of the license they choose before applying it.
30 | Licensors should also secure all rights necessary before
31 | applying our licenses so that the public can reuse the
32 | material as expected. Licensors should clearly mark any
33 | material not subject to the license. This includes other CC-
34 | licensed material, or material used under an exception or
35 | limitation to copyright. More considerations for licensors:
36 | wiki.creativecommons.org/Considerations_for_licensors
37 |
38 | Considerations for the public: By using one of our public
39 | licenses, a licensor grants the public permission to use the
40 | licensed material under specified terms and conditions. If
41 | the licensor's permission is not necessary for any reason--for
42 | example, because of any applicable exception or limitation to
43 | copyright--then that use is not regulated by the license. Our
44 | licenses grant only permissions under copyright and certain
45 | other rights that a licensor has authority to grant. Use of
46 | the licensed material may still be restricted for other
47 | reasons, including because others have copyright or other
48 | rights in the material. A licensor may make special requests,
49 | such as asking that all changes be marked or described.
50 | Although not required by our licenses, you are encouraged to
51 | respect those requests where reasonable. More considerations
52 | for the public:
53 | wiki.creativecommons.org/Considerations_for_licensees
54 |
55 | =======================================================================
56 |
57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
58 | Public License
59 |
60 | By exercising the Licensed Rights (defined below), You accept and agree
61 | to be bound by the terms and conditions of this Creative Commons
62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License
63 | ("Public License"). To the extent this Public License may be
64 | interpreted as a contract, You are granted the Licensed Rights in
65 | consideration of Your acceptance of these terms and conditions, and the
66 | Licensor grants You such rights in consideration of benefits the
67 | Licensor receives from making the Licensed Material available under
68 | these terms and conditions.
69 |
70 |
71 | Section 1 -- Definitions.
72 |
73 | a. Adapted Material means material subject to Copyright and Similar
74 | Rights that is derived from or based upon the Licensed Material
75 | and in which the Licensed Material is translated, altered,
76 | arranged, transformed, or otherwise modified in a manner requiring
77 | permission under the Copyright and Similar Rights held by the
78 | Licensor. For purposes of this Public License, where the Licensed
79 | Material is a musical work, performance, or sound recording,
80 | Adapted Material is always produced where the Licensed Material is
81 | synched in timed relation with a moving image.
82 |
83 | b. Adapter's License means the license You apply to Your Copyright
84 | and Similar Rights in Your contributions to Adapted Material in
85 | accordance with the terms and conditions of this Public License.
86 |
87 | c. BY-NC-SA Compatible License means a license listed at
88 | creativecommons.org/compatiblelicenses, approved by Creative
89 | Commons as essentially the equivalent of this Public License.
90 |
91 | d. Copyright and Similar Rights means copyright and/or similar rights
92 | closely related to copyright including, without limitation,
93 | performance, broadcast, sound recording, and Sui Generis Database
94 | Rights, without regard to how the rights are labeled or
95 | categorized. For purposes of this Public License, the rights
96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar
97 | Rights.
98 |
99 | e. Effective Technological Measures means those measures that, in the
100 | absence of proper authority, may not be circumvented under laws
101 | fulfilling obligations under Article 11 of the WIPO Copyright
102 | Treaty adopted on December 20, 1996, and/or similar international
103 | agreements.
104 |
105 | f. Exceptions and Limitations means fair use, fair dealing, and/or
106 | any other exception or limitation to Copyright and Similar Rights
107 | that applies to Your use of the Licensed Material.
108 |
109 | g. License Elements means the license attributes listed in the name
110 | of a Creative Commons Public License. The License Elements of this
111 | Public License are Attribution, NonCommercial, and ShareAlike.
112 |
113 | h. Licensed Material means the artistic or literary work, database,
114 | or other material to which the Licensor applied this Public
115 | License.
116 |
117 | i. Licensed Rights means the rights granted to You subject to the
118 | terms and conditions of this Public License, which are limited to
119 | all Copyright and Similar Rights that apply to Your use of the
120 | Licensed Material and that the Licensor has authority to license.
121 |
122 | j. Licensor means the individual(s) or entity(ies) granting rights
123 | under this Public License.
124 |
125 | k. NonCommercial means not primarily intended for or directed towards
126 | commercial advantage or monetary compensation. For purposes of
127 | this Public License, the exchange of the Licensed Material for
128 | other material subject to Copyright and Similar Rights by digital
129 | file-sharing or similar means is NonCommercial provided there is
130 | no payment of monetary compensation in connection with the
131 | exchange.
132 |
133 | l. Share means to provide material to the public by any means or
134 | process that requires permission under the Licensed Rights, such
135 | as reproduction, public display, public performance, distribution,
136 | dissemination, communication, or importation, and to make material
137 | available to the public including in ways that members of the
138 | public may access the material from a place and at a time
139 | individually chosen by them.
140 |
141 | m. Sui Generis Database Rights means rights other than copyright
142 | resulting from Directive 96/9/EC of the European Parliament and of
143 | the Council of 11 March 1996 on the legal protection of databases,
144 | as amended and/or succeeded, as well as other essentially
145 | equivalent rights anywhere in the world.
146 |
147 | n. You means the individual or entity exercising the Licensed Rights
148 | under this Public License. Your has a corresponding meaning.
149 |
150 |
151 | Section 2 -- Scope.
152 |
153 | a. License grant.
154 |
155 | 1. Subject to the terms and conditions of this Public License,
156 | the Licensor hereby grants You a worldwide, royalty-free,
157 | non-sublicensable, non-exclusive, irrevocable license to
158 | exercise the Licensed Rights in the Licensed Material to:
159 |
160 | a. reproduce and Share the Licensed Material, in whole or
161 | in part, for NonCommercial purposes only; and
162 |
163 | b. produce, reproduce, and Share Adapted Material for
164 | NonCommercial purposes only.
165 |
166 | 2. Exceptions and Limitations. For the avoidance of doubt, where
167 | Exceptions and Limitations apply to Your use, this Public
168 | License does not apply, and You do not need to comply with
169 | its terms and conditions.
170 |
171 | 3. Term. The term of this Public License is specified in Section
172 | 6(a).
173 |
174 | 4. Media and formats; technical modifications allowed. The
175 | Licensor authorizes You to exercise the Licensed Rights in
176 | all media and formats whether now known or hereafter created,
177 | and to make technical modifications necessary to do so. The
178 | Licensor waives and/or agrees not to assert any right or
179 | authority to forbid You from making technical modifications
180 | necessary to exercise the Licensed Rights, including
181 | technical modifications necessary to circumvent Effective
182 | Technological Measures. For purposes of this Public License,
183 | simply making modifications authorized by this Section 2(a)
184 | (4) never produces Adapted Material.
185 |
186 | 5. Downstream recipients.
187 |
188 | a. Offer from the Licensor -- Licensed Material. Every
189 | recipient of the Licensed Material automatically
190 | receives an offer from the Licensor to exercise the
191 | Licensed Rights under the terms and conditions of this
192 | Public License.
193 |
194 | b. Additional offer from the Licensor -- Adapted Material.
195 | Every recipient of Adapted Material from You
196 | automatically receives an offer from the Licensor to
197 | exercise the Licensed Rights in the Adapted Material
198 | under the conditions of the Adapter's License You apply.
199 |
200 | c. No downstream restrictions. You may not offer or impose
201 | any additional or different terms or conditions on, or
202 | apply any Effective Technological Measures to, the
203 | Licensed Material if doing so restricts exercise of the
204 | Licensed Rights by any recipient of the Licensed
205 | Material.
206 |
207 | 6. No endorsement. Nothing in this Public License constitutes or
208 | may be construed as permission to assert or imply that You
209 | are, or that Your use of the Licensed Material is, connected
210 | with, or sponsored, endorsed, or granted official status by,
211 | the Licensor or others designated to receive attribution as
212 | provided in Section 3(a)(1)(A)(i).
213 |
214 | b. Other rights.
215 |
216 | 1. Moral rights, such as the right of integrity, are not
217 | licensed under this Public License, nor are publicity,
218 | privacy, and/or other similar personality rights; however, to
219 | the extent possible, the Licensor waives and/or agrees not to
220 | assert any such rights held by the Licensor to the limited
221 | extent necessary to allow You to exercise the Licensed
222 | Rights, but not otherwise.
223 |
224 | 2. Patent and trademark rights are not licensed under this
225 | Public License.
226 |
227 | 3. To the extent possible, the Licensor waives any right to
228 | collect royalties from You for the exercise of the Licensed
229 | Rights, whether directly or through a collecting society
230 | under any voluntary or waivable statutory or compulsory
231 | licensing scheme. In all other cases the Licensor expressly
232 | reserves any right to collect such royalties, including when
233 | the Licensed Material is used other than for NonCommercial
234 | purposes.
235 |
236 |
237 | Section 3 -- License Conditions.
238 |
239 | Your exercise of the Licensed Rights is expressly made subject to the
240 | following conditions.
241 |
242 | a. Attribution.
243 |
244 | 1. If You Share the Licensed Material (including in modified
245 | form), You must:
246 |
247 | a. retain the following if it is supplied by the Licensor
248 | with the Licensed Material:
249 |
250 | i. identification of the creator(s) of the Licensed
251 | Material and any others designated to receive
252 | attribution, in any reasonable manner requested by
253 | the Licensor (including by pseudonym if
254 | designated);
255 |
256 | ii. a copyright notice;
257 |
258 | iii. a notice that refers to this Public License;
259 |
260 | iv. a notice that refers to the disclaimer of
261 | warranties;
262 |
263 | v. a URI or hyperlink to the Licensed Material to the
264 | extent reasonably practicable;
265 |
266 | b. indicate if You modified the Licensed Material and
267 | retain an indication of any previous modifications; and
268 |
269 | c. indicate the Licensed Material is licensed under this
270 | Public License, and include the text of, or the URI or
271 | hyperlink to, this Public License.
272 |
273 | 2. You may satisfy the conditions in Section 3(a)(1) in any
274 | reasonable manner based on the medium, means, and context in
275 | which You Share the Licensed Material. For example, it may be
276 | reasonable to satisfy the conditions by providing a URI or
277 | hyperlink to a resource that includes the required
278 | information.
279 | 3. If requested by the Licensor, You must remove any of the
280 | information required by Section 3(a)(1)(A) to the extent
281 | reasonably practicable.
282 |
283 | b. ShareAlike.
284 |
285 | In addition to the conditions in Section 3(a), if You Share
286 | Adapted Material You produce, the following conditions also apply.
287 |
288 | 1. The Adapter's License You apply must be a Creative Commons
289 | license with the same License Elements, this version or
290 | later, or a BY-NC-SA Compatible License.
291 |
292 | 2. You must include the text of, or the URI or hyperlink to, the
293 | Adapter's License You apply. You may satisfy this condition
294 | in any reasonable manner based on the medium, means, and
295 | context in which You Share Adapted Material.
296 |
297 | 3. You may not offer or impose any additional or different terms
298 | or conditions on, or apply any Effective Technological
299 | Measures to, Adapted Material that restrict exercise of the
300 | rights granted under the Adapter's License You apply.
301 |
302 |
303 | Section 4 -- Sui Generis Database Rights.
304 |
305 | Where the Licensed Rights include Sui Generis Database Rights that
306 | apply to Your use of the Licensed Material:
307 |
308 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right
309 | to extract, reuse, reproduce, and Share all or a substantial
310 | portion of the contents of the database for NonCommercial purposes
311 | only;
312 |
313 | b. if You include all or a substantial portion of the database
314 | contents in a database in which You have Sui Generis Database
315 | Rights, then the database in which You have Sui Generis Database
316 | Rights (but not its individual contents) is Adapted Material,
317 | including for purposes of Section 3(b); and
318 |
319 | c. You must comply with the conditions in Section 3(a) if You Share
320 | all or a substantial portion of the contents of the database.
321 |
322 | For the avoidance of doubt, this Section 4 supplements and does not
323 | replace Your obligations under this Public License where the Licensed
324 | Rights include other Copyright and Similar Rights.
325 |
326 |
327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
328 |
329 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
330 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
331 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
332 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
333 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
334 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
335 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
336 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
337 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
338 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
339 |
340 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
341 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
342 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
343 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
344 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
345 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
346 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
347 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
348 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
349 |
350 | c. The disclaimer of warranties and limitation of liability provided
351 | above shall be interpreted in a manner that, to the extent
352 | possible, most closely approximates an absolute disclaimer and
353 | waiver of all liability.
354 |
355 |
356 | Section 6 -- Term and Termination.
357 |
358 | a. This Public License applies for the term of the Copyright and
359 | Similar Rights licensed here. However, if You fail to comply with
360 | this Public License, then Your rights under this Public License
361 | terminate automatically.
362 |
363 | b. Where Your right to use the Licensed Material has terminated under
364 | Section 6(a), it reinstates:
365 |
366 | 1. automatically as of the date the violation is cured, provided
367 | it is cured within 30 days of Your discovery of the
368 | violation; or
369 |
370 | 2. upon express reinstatement by the Licensor.
371 |
372 | For the avoidance of doubt, this Section 6(b) does not affect any
373 | right the Licensor may have to seek remedies for Your violations
374 | of this Public License.
375 |
376 | c. For the avoidance of doubt, the Licensor may also offer the
377 | Licensed Material under separate terms or conditions or stop
378 | distributing the Licensed Material at any time; however, doing so
379 | will not terminate this Public License.
380 |
381 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
382 | License.
383 |
384 |
385 | Section 7 -- Other Terms and Conditions.
386 |
387 | a. The Licensor shall not be bound by any additional or different
388 | terms or conditions communicated by You unless expressly agreed.
389 |
390 | b. Any arrangements, understandings, or agreements regarding the
391 | Licensed Material not stated herein are separate from and
392 | independent of the terms and conditions of this Public License.
393 |
394 |
395 | Section 8 -- Interpretation.
396 |
397 | a. For the avoidance of doubt, this Public License does not, and
398 | shall not be interpreted to, reduce, limit, restrict, or impose
399 | conditions on any use of the Licensed Material that could lawfully
400 | be made without permission under this Public License.
401 |
402 | b. To the extent possible, if any provision of this Public License is
403 | deemed unenforceable, it shall be automatically reformed to the
404 | minimum extent necessary to make it enforceable. If the provision
405 | cannot be reformed, it shall be severed from this Public License
406 | without affecting the enforceability of the remaining terms and
407 | conditions.
408 |
409 | c. No term or condition of this Public License will be waived and no
410 | failure to comply consented to unless expressly agreed to by the
411 | Licensor.
412 |
413 | d. Nothing in this Public License constitutes or may be interpreted
414 | as a limitation upon, or waiver of, any privileges and immunities
415 | that apply to the Licensor or You, including from the legal
416 | processes of any jurisdiction or authority.
417 |
418 | =======================================================================
419 |
420 | Creative Commons is not a party to its public
421 | licenses. Notwithstanding, Creative Commons may elect to apply one of
422 | its public licenses to material it publishes and in those instances
423 | will be considered the “Licensor.” The text of the Creative Commons
424 | public licenses is dedicated to the public domain under the CC0 Public
425 | Domain Dedication. Except for the limited purpose of indicating that
426 | material is shared under a Creative Commons public license or as
427 | otherwise permitted by the Creative Commons policies published at
428 | creativecommons.org/policies, Creative Commons does not authorize the
429 | use of the trademark "Creative Commons" or any other trademark or logo
430 | of Creative Commons without its prior written consent including,
431 | without limitation, in connection with any unauthorized modifications
432 | to any of its public licenses or any other arrangements,
433 | understandings, or agreements concerning use of licensed material. For
434 | the avoidance of doubt, this paragraph does not form part of the
435 | public licenses.
436 |
437 | Creative Commons may be contacted at creativecommons.org.
438 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data Science & Points
2 |
3 | [![CC BY-NC-SA 4.0][cc-by-nc-sa-shield]][cc-by-nc-sa]
4 |
5 |
6 |
7 | Projeto de aplicação em Data Science do início ao fim. Um pipeline completo para solução de dados.
8 |
9 | - [Sobre](#sobre)
10 | - [Contexto](#contexto)
11 | - [Etapas](#etapas)
12 | - [Pré-requisitos](#pré-requisitos)
13 | - [Desafio](#desafio)
14 | - [Sobre o autor](#sobre-o-autor)
15 | - [Como apoiar](#apoie-essa-inciativa)
16 |
17 | Este material está sob a licença: [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License][cc-by-nc-sa].
18 |
19 | [![CC BY-NC-SA 4.0][cc-by-nc-sa-image]][cc-by-nc-sa]
20 |
21 | [cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/
22 | [cc-by-nc-sa-image]: https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png
23 | [cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg
24 |
25 | ## Sobre
26 | Coinstruimos uma solução de Data Science, aplicando técnicas de Machine Learning para um problema de negócios específico.
27 |
28 | Tudo foi desenvolvido ao vivo no canal [Téo Me Why](https://teomewhy.org) e disponibilizado para nossos Subs da Twitch e Membros do YouTube.
29 |
30 | Assina aqui: [Twitch](https://www.twitch.tv/collections/jg9itHOO1ReLcw) / [YouTube](https://www.youtube.com/playlist?list=PLvlkVRRKOYFQOkwDvfgCvKi9-I1jQXiy7)
31 |
32 | ### Contexto
33 | Temos os dados de nossos usuários de sistema de pontos do canal. Com base nisso, desejamos identificar ações e produtos de dados que aumentem o engajamento de nossos usuários.
34 |
35 | Assim, pensamos em construir um projeto de Data Science que aborde todas as etapas necessárias para construção de um produto de dados.
36 |
37 | ### Etapas
38 | - Construção de Feature Store;
39 | - Processamento das safras;
40 | - Construção da variável resposta;
41 | - Construção da ABT (*Analytical Base Table*);
42 | - Treinamento de modelos preditivos;
43 | - Deploy;
44 |
45 | ### Pré-requisitos
46 |
47 | #### Disciplinas
48 |
49 | Para ter uma melhor experiência com nosso projeto, vale a pena conferir as seguintes playlists totalmente gratuitas:
50 |
51 | - [Git/GitHub](https://www.youtube.com/playlist?list=PLvlkVRRKOYFQ3cfYPjLeQ0KvrQ8bG5H11)
52 | - [Python](https://www.youtube.com/playlist?list=PLvlkVRRKOYFRXdquucikNbwYeFzzzYIGb)
53 | - [Pandas](https://www.youtube.com/playlist?list=PLvlkVRRKOYFSl-XCxNQ1u3uOLvDnYxupG)
54 | - [Estatística](https://www.youtube.com/playlist?list=PLvlkVRRKOYFSWIyhwq4Nu8sNd_GfOi1tj)
55 | - [Machine Learning](https://www.youtube.com/playlist?list=PLvlkVRRKOYFTXcpttQSZmv1wDg7F3uH7o)
56 |
57 | #### Materiais
58 |
59 | - :arrow_lower_right: [Baixe os dados aqui!](https://drive.google.com/drive/folders/1JLzofrtaVQdo0PdUysNWjNsBdAaI21EJ?usp=sharing) :arrow_lower_left:
60 | - :arrow_lower_right: [Acesso a Apresentação aqui!](https://docs.google.com/presentation/d/1zMTsaAeoMX9ico13PVd7_tOffE8kUH-IOA5kCjSYIx8/edit?usp=sharing) :arrow_lower_left:
61 |
62 | #### Softwares
63 | - [Python/Anaconda](anaconda.com/download)
64 | - [VSCode](https://code.visualstudio.com/download)
65 | - [Extensão Python](https://marketplace.visualstudio.com/items?itemName=ms-python.python)
66 | - [Extensão Jupyter](https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter)
67 | - [Extensão SQLite](https://marketplace.visualstudio.com/items?itemName=alexcvzz.vscode-sqlite)
68 | - [Extensão SQLTools SQLite](https://marketplace.visualstudio.com/items?itemName=mtxr.sqltools-driver-sqlite)
69 |
70 | #### Setup
71 |
72 | Com as ferramentas necessários instaladas, podemos criar nosso *enviroment* a partir do Anaconda (conda):
73 |
74 | ```bash
75 | conda create --name ds_points python=3.
76 | conda activate ds_points
77 |
78 | pip install -r requirements.txt
79 | ```
80 |
81 | ## Desafio
82 |
83 | Durante o nosso curso realizamos o treinamento de um modelo Random Forest com GridSearch. A partir deste modelo, obtivemos as seguintes métricas:
84 |
85 | | Base | Acurárica | Curva Roc | Precisão | Recall |
86 | | :---: | :---: | :---: | ---: | :---: |
87 | | **Train** | 0.819401 | 0.913987 | 0.770598 | 0.845745 |
88 | | **Test** | 0.747634 | 0.817416 | 0.684848 | 0.801418 |
89 | | **Oot** | 0.741602 | 0.814528 | 0.669291 | 0.594406 |
90 |
91 | Utilize os dados [deste link](https://docs.google.com/spreadsheets/d/1zcP7CKDcqEkhK2b_g27yGY226ZaX_kX4UxBsNQfM9RQ/edit?usp=sharing) para tentar melhorar a performance do modelo na base Out of Time (oot).
92 |
93 | Considere:
94 |
95 | ```python
96 |
97 | target = 'flChurn'
98 | features = df_train.columns[3:].tolist()
99 |
100 | # Dataframe oot
101 | df_oot = df[df['dtRef']==df['dtRef'].max()]
102 |
103 | # Dataframe de treino
104 | df_train = df[df['dtRef']
121 |
122 |
123 |
124 |
125 |
126 |
127 | ## Apoie essa inciativa!
128 |
129 | Realizamos um trabalho de educação na área de dados de forma gratuita, então todo apoio é importante. Confira as diferentes maneiras de nos apoiar:
130 |
131 | - 💵 Chave Pix: pix@teomewhy.org
132 | - 💶 LivePix: [livepix.gg/teomewhy](livepix.gg/teomewhy)
133 | - 💷 GitHub Sponsors: [github.com/sponsors/TeoMeWhy](github.com/sponsors/TeoMeWhy)
134 | - 💴 ApoiaSe: [apoia.se/teomewhy](apoia.se/teomewhy)
135 | - 🎥 Membro no YouTube: [youtube.com/@teomewhy/membership](https://www.youtube.com/@teomewhy/membership)
136 | - 🎮 Sub na Twitch: [twitch.tv/teomewhy](https://www.twitch.tv/teomewhy)
137 | - 💌 Newsletter: [teomewhy.substack.com](https://teomewhy.substack.com/)
138 |
--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeoMeWhy/ds-points/055bcc014bf896757ee412951296dc3e1bd37aba/data/.gitkeep
--------------------------------------------------------------------------------
/data/query.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM customer_profile
3 |
4 | group by 1,2
--------------------------------------------------------------------------------
/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeoMeWhy/ds-points/055bcc014bf896757ee412951296dc3e1bd37aba/models/.gitkeep
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | feature_engine==1.8.0
2 | pandas==2.2.2
3 | scikit_learn==1.5.0
4 | SQLAlchemy==2.0.30
5 | tqdm==4.66.4
6 | mlflow==2.13.2
7 | openpyxl==3.1.4
--------------------------------------------------------------------------------
/src/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeoMeWhy/ds-points/055bcc014bf896757ee412951296dc3e1bd37aba/src/.gitkeep
--------------------------------------------------------------------------------
/src/app/api.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "app/db"
5 | "net/http"
6 |
7 | "github.com/gin-gonic/gin"
8 | )
9 |
10 | var con, _ = db.Connect()
11 |
12 | func getUserProfile(c *gin.Context) {
13 | id := c.Param("id")
14 |
15 | profile, err := db.GetUser(id, con)
16 | if err != nil {
17 | c.JSON(http.StatusInternalServerError, gin.H{"error": "erro interno na busca pelo usuário"})
18 | return
19 | }
20 |
21 | if profile["idCustomer"] == "" {
22 | c.JSON(http.StatusNotFound, gin.H{"error": "usuário não encontrado"})
23 | return
24 | }
25 |
26 | c.JSON(http.StatusOK, profile)
27 |
28 | }
29 |
30 | func main() {
31 |
32 | router := gin.Default()
33 |
34 | router.GET("/profile/:id", func(c *gin.Context) {
35 | getUserProfile(c)
36 | })
37 |
38 | router.Run("localhost:8082")
39 | }
40 |
--------------------------------------------------------------------------------
/src/app/db/db.go:
--------------------------------------------------------------------------------
1 | package db
2 |
3 | import (
4 | "database/sql"
5 |
6 | _ "github.com/mattn/go-sqlite3"
7 | )
8 |
9 | func Connect() (*sql.DB, error) {
10 |
11 | con, err := sql.Open("sqlite3", "../../data/feature_store.db")
12 | if err != nil {
13 | return nil, err
14 | }
15 |
16 | return con, nil
17 |
18 | }
19 |
20 | func GetUser(id string, con *sql.DB) (map[string]string, error) {
21 |
22 | query := `
23 | SELECT
24 | dtRef AS dtRef,
25 | idCustomer AS idCustomer,
26 | prob_churn AS probChurn,
27 | cluster_recencia AS cicloVida,
28 | cluster_fv AS clusterRF,
29 | dtUpdate AS dtUpdate
30 |
31 | FROM customer_profile
32 |
33 | WHERE idCustomer = ?
34 | AND dtRef = (SELECT MAX(dtRef) FROM customer_profile)
35 | `
36 |
37 | state, err := con.Prepare(query)
38 | if err != nil {
39 | return nil, err
40 | }
41 |
42 | rows, err := state.Query(id)
43 | if err != nil {
44 | return nil, err
45 | }
46 |
47 | var dtRef, idCustomer, probChurn, cicloVida, clusterRF, dtUpdate string
48 | for rows.Next() {
49 | rows.Scan(&dtRef, &idCustomer, &probChurn, &cicloVida, &clusterRF, &dtUpdate)
50 | }
51 |
52 | values := map[string]string{
53 | "dtRef": dtRef,
54 | "idCustomer": idCustomer,
55 | "probChurn": probChurn,
56 | "cicloVida": cicloVida,
57 | "clusterRF": clusterRF,
58 | "dtUpdate": dtUpdate,
59 | }
60 |
61 | return values, nil
62 | }
63 |
--------------------------------------------------------------------------------
/src/app/go.mod:
--------------------------------------------------------------------------------
1 | module app
2 |
3 | go 1.22.2
4 |
5 | require (
6 | github.com/gin-gonic/gin v1.10.0
7 | github.com/mattn/go-sqlite3 v1.14.22
8 | )
9 |
10 | require (
11 | github.com/bytedance/sonic v1.11.6 // indirect
12 | github.com/bytedance/sonic/loader v0.1.1 // indirect
13 | github.com/cloudwego/base64x v0.1.4 // indirect
14 | github.com/cloudwego/iasm v0.2.0 // indirect
15 | github.com/gabriel-vasile/mimetype v1.4.3 // indirect
16 | github.com/gin-contrib/sse v0.1.0 // indirect
17 | github.com/go-playground/locales v0.14.1 // indirect
18 | github.com/go-playground/universal-translator v0.18.1 // indirect
19 | github.com/go-playground/validator/v10 v10.20.0 // indirect
20 | github.com/goccy/go-json v0.10.2 // indirect
21 | github.com/json-iterator/go v1.1.12 // indirect
22 | github.com/klauspost/cpuid/v2 v2.2.7 // indirect
23 | github.com/leodido/go-urn v1.4.0 // indirect
24 | github.com/mattn/go-isatty v0.0.20 // indirect
25 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
26 | github.com/modern-go/reflect2 v1.0.2 // indirect
27 | github.com/pelletier/go-toml/v2 v2.2.2 // indirect
28 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
29 | github.com/ugorji/go/codec v1.2.12 // indirect
30 | golang.org/x/arch v0.8.0 // indirect
31 | golang.org/x/crypto v0.23.0 // indirect
32 | golang.org/x/net v0.25.0 // indirect
33 | golang.org/x/sys v0.20.0 // indirect
34 | golang.org/x/text v0.15.0 // indirect
35 | google.golang.org/protobuf v1.34.1 // indirect
36 | gopkg.in/yaml.v3 v3.0.1 // indirect
37 | )
38 |
--------------------------------------------------------------------------------
/src/app/go.sum:
--------------------------------------------------------------------------------
1 | github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
2 | github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
3 | github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
4 | github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
5 | github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
6 | github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
7 | github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
8 | github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
9 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
10 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
11 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
12 | github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
13 | github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
14 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
15 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
16 | github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
17 | github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
18 | github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
19 | github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
20 | github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
21 | github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
22 | github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
23 | github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
24 | github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
25 | github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
26 | github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
27 | github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
28 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
29 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
30 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
31 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
32 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
33 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
34 | github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
35 | github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
36 | github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
37 | github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
38 | github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
39 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
40 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
41 | github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
42 | github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
43 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
44 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
45 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
46 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
47 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
48 | github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
49 | github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
50 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
51 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
52 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
53 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
54 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
55 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
56 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
57 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
58 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
59 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
60 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
61 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
62 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
63 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
64 | github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
65 | github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
66 | github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
67 | github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
68 | golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
69 | golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
70 | golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
71 | golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
72 | golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
73 | golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
74 | golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
75 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
76 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
77 | golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
78 | golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
79 | golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
80 | golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
81 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
82 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
83 | google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
84 | google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
85 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
86 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
87 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
88 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
89 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
90 | nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
91 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
92 |
--------------------------------------------------------------------------------
/src/feature_store/exec.sh:
--------------------------------------------------------------------------------
1 | python execute.py -f fs_general
2 | python execute.py -f fs_horario
3 | python execute.py -f fs_points
4 | python execute.py -f fs_produtos
5 | python execute.py -f fs_transacoes
6 |
--------------------------------------------------------------------------------
/src/feature_store/execute.py:
--------------------------------------------------------------------------------
1 | # %%
2 | import argparse
3 | import datetime
4 |
5 | import pandas as pd
6 | import sqlalchemy
7 | from sqlalchemy import exc
8 |
9 | from tqdm import tqdm
10 |
11 | def import_query(path):
12 | with open(path, 'r') as open_file:
13 | return open_file.read()
14 |
15 |
16 | def date_range(start, stop):
17 | dt_start = datetime.datetime.strptime(start, '%Y-%m-%d')
18 | dt_stop = datetime.datetime.strptime(stop, '%Y-%m-%d')
19 | dates = []
20 | while dt_start <= dt_stop:
21 | dates.append(dt_start.strftime("%Y-%m-%d"))
22 | dt_start += datetime.timedelta(days=1)
23 | return dates
24 |
25 |
26 | def ingest_date(query, table, dt):
27 |
28 | # Substituição de '{date}' por uma data ex: 2024-06-06
29 | query_fmt = query.format(date=dt)
30 |
31 | # Executa e trás o resultado para o Python
32 | df = pd.read_sql(query_fmt, ORIGIN_ENGINE)
33 |
34 | # Delete os dados com a data de referência para garantir integridade
35 | with TARGET_ENGINE.connect() as con:
36 | try:
37 | state = f"DELETE FROM {table} WHERE dtRef = '{dt}';"
38 | con.execute(sqlalchemy.text(state))
39 | con.commit()
40 | except exc.OperationalError as err:
41 | print("Tabela ainda não existe, criando ela...")
42 |
43 | # Enviando os dados para o novo database
44 | df.to_sql(table, TARGET_ENGINE, index=False, if_exists='append')
45 |
46 |
47 | # %%
48 |
49 | now = datetime.datetime.now().strftime("%Y-%m-%d")
50 |
51 | parser = argparse.ArgumentParser()
52 | parser.add_argument("--feature_store", "-f", help="Nome da feature Store", type=str)
53 | parser.add_argument("--start", "-s", help="Data de início", default=now, type=str)
54 | parser.add_argument("--stop", "-p", help="Data de fim", default=now, type=str)
55 | args = parser.parse_args()
56 |
57 | ORIGIN_ENGINE = sqlalchemy.create_engine("sqlite:///../../data/database.db")
58 | TARGET_ENGINE = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db")
59 |
60 | # Import da query
61 | query = import_query(f"{args.feature_store}.sql")
62 | dates = date_range(args.start, args.stop)
63 |
64 | for i in tqdm(dates):
65 | ingest_date(query, args.feature_store, i)
66 |
67 |
--------------------------------------------------------------------------------
/src/feature_store/fs_general.sql:
--------------------------------------------------------------------------------
1 | WITH tb_rfv AS (
2 |
3 | SELECT
4 | idCustomer,
5 |
6 | CAST(min(julianday('{date}') - julianday(dtTransaction))
7 | AS INTEGER) + 1 AS recenciaDias,
8 |
9 | COUNT(DISTINCT DATE(dtTransaction)) AS frequenciaDias,
10 |
11 | SUM(CASE
12 | WHEN pointsTransaction > 0 THEN pointsTransaction
13 | END) AS valorPoints
14 |
15 | FROM transactions
16 |
17 | WHERE dtTransaction < '{date}'
18 | AND dtTransaction >= DATE('{date}', '-21 day')
19 |
20 | GROUP BY idCustomer
21 | ),
22 |
23 | tb_idade AS (
24 |
25 | SELECT
26 |
27 | t1.idCustomer,
28 |
29 | CAST(MAX(julianday('{date}') - julianday(t2.dtTransaction))
30 | AS INTEGER) + 1 AS idadeBaseDias
31 |
32 | FROM tb_rfv AS t1
33 |
34 | LEFT JOIN transactions AS t2
35 | ON t1.idCustomer = t2.idCustomer
36 |
37 | GROUP BY t2.idCustomer
38 |
39 | )
40 |
41 | SELECT
42 | '{date}' AS dtRef,
43 | t1.*,
44 | t2.idadeBaseDias,
45 | t3.flEmail
46 |
47 | FROM tb_rfv AS t1
48 |
49 | LEFT JOIN tb_idade AS t2
50 | ON t1.idCustomer = t2.idCustomer
51 |
52 | LEFT JOIN customers AS t3
53 | ON t1.idCustomer = t3.idCustomer
--------------------------------------------------------------------------------
/src/feature_store/fs_horario.sql:
--------------------------------------------------------------------------------
1 | WITH tb_transactions_hour AS (
2 |
3 | SELECT idCustomer,
4 | pointsTransaction,
5 | CAST(STRFTIME('%H', DATETIME(dtTransaction, '-3 hour')) AS INTEGER) AS hour
6 |
7 | FROM transactions
8 |
9 | WHERE dtTransaction < '{date}'
10 | AND dtTransaction >= DATE('{date}', '-21 day')
11 |
12 | ),
13 |
14 | tb_share AS (
15 |
16 | SELECT idCustomer,
17 | SUM(CASE WHEN hour >= 8 and hour < 12 THEN abs(pointsTransaction) ELSE 0 END) AS qtdPointsManha,
18 | SUM(CASE WHEN hour >= 12 and hour < 18 THEN abs(pointsTransaction) ELSE 0 END) AS qtdPointsTarde,
19 | SUM(CASE WHEN hour >= 18 and hour <= 23 THEN abs(pointsTransaction) ELSE 0 END) AS qtdPointsNoite,
20 |
21 | 1.0 * SUM(CASE WHEN hour >= 8 and hour < 12 THEN abs(pointsTransaction) ELSE 0 END) / SUM(abs(pointsTransaction)) AS pctPointsManha,
22 | 1.0 * SUM(CASE WHEN hour >= 12 and hour < 18 THEN abs(pointsTransaction) ELSE 0 END) / SUM(abs(pointsTransaction)) AS pctPointsTarde,
23 | 1.0 * SUM(CASE WHEN hour >= 18 and hour <= 23 THEN abs(pointsTransaction) ELSE 0 END) / SUM(abs(pointsTransaction)) AS pctPointsNoite,
24 |
25 | SUM(CASE WHEN hour >= 8 and hour < 12 THEN 1 ELSE 0 END) AS qtdTransacoesManha,
26 | SUM(CASE WHEN hour >= 12 and hour < 18 THEN 1 ELSE 0 END) AS qtdTransacoesTarde,
27 | SUM(CASE WHEN hour >= 18 and hour <= 23 THEN 1 ELSE 0 END) AS qtdTransacoesNoite,
28 |
29 | 1.0 * SUM(CASE WHEN hour >= 8 and hour < 12 THEN 1 ELSE 0 END) / SUM(1) AS pctTransacoesManha,
30 | 1.0 * SUM(CASE WHEN hour >= 12 and hour < 18 THEN 1 ELSE 0 END) / SUM(1) AS pctTransacoesTarde,
31 | 1.0 * SUM(CASE WHEN hour >= 18 and hour <= 23 THEN 1 ELSE 0 END) / SUM(1) AS pctTransacoesNoite
32 |
33 | FROM tb_transactions_hour
34 |
35 | GROUP BY idCustomer
36 |
37 | )
38 |
39 | SELECT
40 | '{date}' AS dtRef,
41 | *
42 |
43 | FROM tb_share
--------------------------------------------------------------------------------
/src/feature_store/fs_points.sql:
--------------------------------------------------------------------------------
1 | WITH tb_pontos_d AS (
2 |
3 | SELECT idCustomer,
4 |
5 | SUM(pointsTransaction) AS saldoPointsD21,
6 |
7 | SUM(CASE WHEN dtTransaction >= DATE('{date}', '-14 day')
8 | THEN pointsTransaction
9 | ELSE 0
10 | END) AS saldoPointsD14,
11 |
12 | SUM(CASE WHEN dtTransaction >= DATE('{date}', '-7 day')
13 | THEN pointsTransaction
14 | ELSE 0
15 | END) AS saldoPointsD7,
16 |
17 |
18 | SUM(CASE WHEN pointsTransaction > 0
19 | THEN pointsTransaction
20 | ELSE 0
21 | END) AS pointsAcumuladosD21,
22 |
23 | SUM(CASE WHEN pointsTransaction > 0
24 | AND dtTransaction >= DATE('{date}', '-14 day')
25 | THEN pointsTransaction
26 | ELSE 0
27 | END) AS pointsAcumuladosD14,
28 |
29 | SUM(CASE WHEN pointsTransaction > 0
30 | AND dtTransaction >= DATE('{date}', '-7 day')
31 | THEN pointsTransaction
32 | ELSE 0
33 | END) AS pointsAcumuladosD7,
34 |
35 |
36 | SUM(CASE WHEN pointsTransaction < 0
37 | THEN pointsTransaction
38 | ELSE 0
39 | END) AS pointsResgatadosD21,
40 |
41 | SUM(CASE WHEN pointsTransaction < 0
42 | AND dtTransaction >= DATE('{date}', '-14 day')
43 | THEN pointsTransaction
44 | ELSE 0
45 | END) AS pointsResgatadosD14,
46 |
47 | SUM(CASE WHEN pointsTransaction < 0
48 | AND dtTransaction >= DATE('{date}', '-7 day')
49 | THEN pointsTransaction
50 | ELSE 0
51 | END) AS pointsResgatadosD7
52 |
53 |
54 | FROM transactions
55 |
56 | WHERE dtTransaction < '{date}'
57 | AND dtTransaction >= DATE('{date}', '-21 day')
58 |
59 | GROUP BY idCustomer
60 |
61 | ),
62 |
63 | tb_vida AS (
64 |
65 | SELECT t1.idCustomer,
66 | SUM(t2.pointsTransaction) AS saldoPoints,
67 | SUM(CASE
68 | WHEN t2.pointsTransaction > 0
69 | THEN t2.pointsTransaction
70 | ELSE 0
71 | END) AS pointsAcumuladosVida,
72 | SUM(CASE
73 | WHEN t2.pointsTransaction < 0
74 | THEN t2.pointsTransaction
75 | ELSE 0
76 | END) AS pointsResgatadosVida,
77 |
78 | CAST(max(julianday('{date}') - julianday(dtTransaction)) AS INTEGER) + 1 AS diasVida
79 |
80 | FROM tb_pontos_d AS t1
81 |
82 | LEFT JOIN transactions AS t2
83 | ON t1.idCustomer = t2.idCustomer
84 |
85 | WHERE t2.dtTransaction < '{date}'
86 |
87 | GROUP BY t1.idCustomer
88 |
89 | ),
90 |
91 | tb_join AS (
92 |
93 | SELECT
94 | t1.*,
95 | t2.saldoPoints,
96 | t2.pointsAcumuladosVida,
97 | t2.pointsResgatadosVida,
98 | 1.0 * t2.pointsAcumuladosVida / t2.diasVida AS pointsPorDia
99 |
100 | FROM tb_pontos_d As t1
101 |
102 | LEFT JOIN tb_vida AS t2
103 | ON t1.idCustomer = t2.idCustomer
104 |
105 | )
106 |
107 |
108 | SELECT
109 | '{date}' AS dtRef,
110 | *
111 | FROM tb_join
--------------------------------------------------------------------------------
/src/feature_store/fs_produtos.sql:
--------------------------------------------------------------------------------
1 | WITH tb_transactions_products AS (
2 |
3 | SELECT t1.*,
4 | t2.NameProduct,
5 | t2.QuantityProduct
6 |
7 | FROM transactions AS t1
8 |
9 | LEFT JOIN transactions_product AS t2
10 | ON t1.idTransaction = t2.idTransaction
11 |
12 | WHERE t1.dtTransaction < '{date}'
13 | AND t1.dtTransaction >= DATE('{date}', '-21 day')
14 |
15 | ),
16 |
17 | tb_share AS (
18 |
19 | SELECT
20 |
21 | idCustomer,
22 |
23 | SUM(CASE WHEN NameProduct = 'ChatMessage' THEN QuantityProduct ELSE 0 END) AS qtdeChatMessage,
24 | SUM(CASE WHEN NameProduct = 'Lista de presença' THEN QuantityProduct ELSE 0 END) AS qtdeListaPresença,
25 | SUM(CASE WHEN NameProduct = 'Resgatar Ponei' THEN QuantityProduct ELSE 0 END) AS qtdeResgatarPonei,
26 | SUM(CASE WHEN NameProduct = 'Troca de Pontos StreamElements' THEN QuantityProduct ELSE 0 END) AS qtdeTrocaPontos,
27 | SUM(CASE WHEN NameProduct = 'Presença Streak' THEN QuantityProduct ELSE 0 END) AS qtdePresençaStreak,
28 | SUM(CASE WHEN NameProduct = 'Airflow Lover' THEN QuantityProduct ELSE 0 END) AS qtdeAirflowLover,
29 | SUM(CASE WHEN NameProduct = 'R Lover' THEN QuantityProduct ELSE 0 END) AS qtdeRLover,
30 |
31 | SUM(CASE WHEN NameProduct = 'ChatMessage' THEN pointsTransaction ELSE 0 END) AS pointsChatMessage,
32 | SUM(CASE WHEN NameProduct = 'Lista de presença' THEN pointsTransaction ELSE 0 END) AS pointsListaPresença,
33 | SUM(CASE WHEN NameProduct = 'Resgatar Ponei' THEN pointsTransaction ELSE 0 END) AS pointsResgatarPonei,
34 | SUM(CASE WHEN NameProduct = 'Troca de Pontos StreamElements' THEN pointsTransaction ELSE 0 END) AS pointsTrocaPontos,
35 | SUM(CASE WHEN NameProduct = 'Presença Streak' THEN pointsTransaction ELSE 0 END) AS pointsPresençaStreak,
36 | SUM(CASE WHEN NameProduct = 'Airflow Lover' THEN pointsTransaction ELSE 0 END) AS pointsAirflowLover,
37 | SUM(CASE WHEN NameProduct = 'R Lover' THEN pointsTransaction ELSE 0 END) AS pointsRLover,
38 |
39 | 1.0 * SUM(CASE WHEN NameProduct = 'ChatMessage' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctChatMessage,
40 | 1.0 * SUM(CASE WHEN NameProduct = 'Lista de presença' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctListaPresença,
41 | 1.0 * SUM(CASE WHEN NameProduct = 'Resgatar Ponei' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctResgatarPonei,
42 | 1.0 * SUM(CASE WHEN NameProduct = 'Troca de Pontos StreamElements' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctTrocaPontos,
43 | 1.0 * SUM(CASE WHEN NameProduct = 'Presença Streak' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctPresençaStreak,
44 | 1.0 * SUM(CASE WHEN NameProduct = 'Airflow Lover' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctAirflowLover,
45 | 1.0 * SUM(CASE WHEN NameProduct = 'R Lover' THEN QuantityProduct ELSE 0 END) / SUM(QuantityProduct) AS pctRLover,
46 |
47 | 1.0 * SUM(CASE WHEN NameProduct = 'ChatMessage' THEN QuantityProduct ELSE 0 END) / COUNT(DISTINCT DATE(dtTransaction)) AS avgChatLive
48 |
49 | FROM tb_transactions_products
50 |
51 | GROUP BY idCustomer
52 |
53 | ),
54 |
55 | tb_group AS (
56 |
57 | SELECT idCustomer,
58 | NameProduct,
59 | sum(QuantityProduct) AS qtde,
60 | sum(pointsTransaction) AS points
61 |
62 | FROM tb_transactions_products
63 | GROUP BY idCustomer, NameProduct
64 |
65 | ),
66 |
67 | tb_rn AS (
68 |
69 | SELECT *,
70 | ROW_NUMBER() OVER (PARTITION BY idCustomer ORDER BY qtde DESC, points DESC) AS rnQtde
71 |
72 | from tb_group
73 | order by idCustomer
74 |
75 | ),
76 |
77 | tb_produto_max AS (
78 |
79 | SELECT *
80 | FROM tb_rn
81 | WHERE rnQtde = 1
82 |
83 | )
84 |
85 | SELECT
86 | '{date}' AS dtRef,
87 | t1.*,
88 | t2.NameProduct AS productMaxQtde
89 |
90 | FROM tb_share AS t1
91 |
92 | LEFT JOIN tb_produto_max AS t2
93 | ON t1.idCustomer = t2.idCustomer
94 |
95 |
--------------------------------------------------------------------------------
/src/feature_store/fs_transacoes.sql:
--------------------------------------------------------------------------------
1 | WITH tb_transactions AS (
2 |
3 | SELECT *
4 | FROM transactions
5 | WHERE dtTransaction < '{date}'
6 | AND dtTransaction >= DATE('{date}', '-21 day')
7 |
8 | ),
9 |
10 | tb_freq AS (
11 |
12 | SELECT
13 | idCustomer,
14 | count(distinct date(dtTransaction)) AS qtdeDiasD21,
15 | count(distinct CASE WHEN dtTransaction > date('{date}', '-14 day') THEN date(dtTransaction) END) AS qtdeDiasD14,
16 | count(distinct CASE WHEN dtTransaction > date('{date}', '-7 day') THEN date(dtTransaction) END) AS qtdeDiasD7
17 |
18 | FROM tb_transactions
19 |
20 | GROUP BY idCustomer
21 | ),
22 |
23 | tb_live_minutes AS (
24 |
25 | SELECT idCustomer,
26 | date(datetime(dtTransaction, '-3 hour')) AS dtTransactionDate,
27 | min(datetime(dtTransaction, '-3 hour')) AS dtInicio,
28 | max(datetime(dtTransaction, '-3 hour')) AS dtFim,
29 | (julianday(max(datetime(dtTransaction, '-3 hour'))) -
30 | julianday(min(datetime(dtTransaction, '-3 hour')))) * 24 * 60 AS liveMinutes
31 |
32 | FROM tb_transactions
33 |
34 | GROUP BY 1,2
35 |
36 | ),
37 |
38 | tb_hours AS (
39 |
40 | SELECT idCustomer,
41 | AVG(liveMinutes) AS avgLiveMinutes,
42 | SUM(liveMinutes) AS sumLiveMinutes,
43 | MIN(liveMinutes) AS minLiveMinutes,
44 | MAX(liveMinutes) AS maxLiveMinutes
45 | FROM tb_live_minutes
46 | GROUP BY idCustomer
47 | ),
48 |
49 | tb_vida AS (
50 |
51 | SELECT idCustomer,
52 | COUNT(DISTINCT idTransaction) AS qtdeTransacaoVida,
53 | COUNT(DISTINCT idTransaction) / (max(julianday('{date}') - julianday(dtTransaction))) AS avgTransacaoDia
54 |
55 | FROM transactions
56 | WHERE dtTransaction < '{date}'
57 | GROUP BY idCustomer
58 |
59 | ),
60 |
61 | tb_join AS (
62 |
63 | SELECT t1.*,
64 | t2.avgLiveMinutes,
65 | t2.sumLiveMinutes,
66 | t2.minLiveMinutes,
67 | t2.maxLiveMinutes,
68 | t3.qtdeTransacaoVida,
69 | t3.avgTransacaoDia
70 |
71 | FROM tb_freq AS t1
72 |
73 | LEFT JOIN tb_hours AS t2
74 | ON t1.idCustomer = t2.idCustomer
75 |
76 | LEFT JOIN tb_vida AS t3
77 | ON t3.idCustomer = t1.idCustomer
78 | )
79 |
80 | SELECT
81 | '{date}' AS dtRef,
82 | *
83 |
84 | FROM tb_join
--------------------------------------------------------------------------------
/src/pipeline.sh:
--------------------------------------------------------------------------------
1 | cd feature_store
2 | bash exec.sh
3 | cd ../predict
4 | python profile_user.py
--------------------------------------------------------------------------------
/src/predict/etl.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | t1.dtRef,
3 | t1.idCustomer,
4 | t1.recenciaDias,
5 | t1.frequenciaDias,
6 | t1.valorPoints,
7 | t1.idadeBaseDias,
8 | t1.flEmail,
9 | t3.qtdPointsManha,
10 | t3.qtdPointsTarde,
11 | t3.qtdPointsNoite,
12 | t3.pctPointsManha,
13 | t3.pctPointsTarde,
14 | t3.pctPointsNoite,
15 | t3.qtdTransacoesManha,
16 | t3.qtdTransacoesTarde,
17 | t3.qtdTransacoesNoite,
18 | t3.pctTransacoesManha,
19 | t3.pctTransacoesTarde,
20 | t3.pctTransacoesNoite,
21 | t4.saldoPointsD21,
22 | t4.saldoPointsD14,
23 | t4.saldoPointsD7,
24 | t4.pointsAcumuladosD21,
25 | t4.pointsAcumuladosD14,
26 | t4.pointsAcumuladosD7,
27 | t4.pointsResgatadosD21,
28 | t4.pointsResgatadosD14,
29 | t4.pointsResgatadosD7,
30 | t4.saldoPoints,
31 | t4.pointsAcumuladosVida,
32 | t4.pointsResgatadosVida,
33 | t4.pointsPorDia,
34 | t5.qtdeChatMessage,
35 | t5.qtdeListaPresença,
36 | t5.qtdeResgatarPonei,
37 | t5.qtdeTrocaPontos,
38 | t5.qtdePresençaStreak,
39 | t5.qtdeAirflowLover,
40 | t5.qtdeRLover,
41 | t5.pointsChatMessage,
42 | t5.pointsListaPresença,
43 | t5.pointsResgatarPonei,
44 | t5.pointsTrocaPontos,
45 | t5.pointsPresençaStreak,
46 | t5.pointsAirflowLover,
47 | t5.pointsRLover,
48 | t5.pctChatMessage,
49 | t5.pctListaPresença,
50 | t5.pctResgatarPonei,
51 | t5.pctTrocaPontos,
52 | t5.pctPresençaStreak,
53 | t5.pctAirflowLover,
54 | t5.pctRLover,
55 | t5.avgChatLive,
56 | t5.productMaxQtde,
57 | t6.qtdeDiasD21,
58 | t6.qtdeDiasD14,
59 | t6.qtdeDiasD7,
60 | t6.avgLiveMinutes,
61 | t6.sumLiveMinutes,
62 | t6.minLiveMinutes,
63 | t6.maxLiveMinutes,
64 | t6.qtdeTransacaoVida,
65 | t6.avgTransacaoDia
66 |
67 | FROM fs_general AS t1
68 |
69 | LEFT JOIN fs_horario AS t3
70 | ON t1.idCustomer = t3.idCustomer
71 | AND t1.dtRef = t3.dtRef
72 |
73 | LEFT JOIN fs_points AS t4
74 | ON t1.idCustomer = t4.idCustomer
75 | AND t1.dtRef = t4.dtRef
76 |
77 | LEFT JOIN fs_produtos AS t5
78 | ON t1.idCustomer = t5.idCustomer
79 | AND t1.dtRef = t5.dtRef
80 |
81 | LEFT JOIN fs_transacoes AS t6
82 | ON t1.idCustomer = t6.idCustomer
83 | AND t1.dtRef = t6.dtRef
84 |
85 | WHERE t1.DtRef = date('now')
--------------------------------------------------------------------------------
/src/predict/predict.py:
--------------------------------------------------------------------------------
1 | # %%
2 | import pandas as pd
3 | import sqlalchemy
4 | from sqlalchemy import exc
5 |
6 | import mlflow
7 | import mlflow.sklearn
8 |
9 | import json
10 |
11 | print("Scrip para execução de modelos iniciado!")
12 |
13 | print("Carregando modelo...")
14 | mlflow.set_tracking_uri("http://192.168.1.100:8081")
15 | model = mlflow.sklearn.load_model("models:/Churn-Teo-Me-Why/production")
16 |
17 | # %%
18 | print("Carregando as features do modelo...")
19 | model_info = mlflow.models.get_model_info("models:/Churn-Teo-Me-Why/production")
20 | features = [i['name'] for i in json.loads(model_info.signature_dict['inputs'])]
21 | features
22 |
23 | # %%
24 | print("Carregando base para score...")
25 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db")
26 | with open("etl.sql", 'r') as open_file:
27 | query = open_file.read()
28 |
29 | df = pd.read_sql(query, engine)
30 |
31 | # %%
32 | print("Realizando predições...")
33 | pred = model.predict_proba(df[features])
34 | proba_churn = pred[:,1]
35 |
36 | # %%
37 |
38 | print("Persistindo dados...")
39 | df_predict = df[['dtRef', 'idCustomer']].copy()
40 | df_predict['probaChurn'] = proba_churn.copy()
41 | df_predict = (df_predict.sort_values("probaChurn", ascending=False)
42 | .reset_index(drop=True))
43 |
44 | with engine.connect() as con:
45 | state = f"DELETE FROM tb_churn WHERE dtRef = '{df_predict['dtRef'].min()}';"
46 |
47 | try:
48 | state = sqlalchemy.text(state)
49 | con.execute(state)
50 | con.commit()
51 | except exc.OperationalError as err:
52 | print("Tabela ainda não existe...")
53 |
54 | df_predict.to_sql("tb_churn", engine, if_exists='append', index=False)
55 |
56 | print("Fim.")
--------------------------------------------------------------------------------
/src/predict/profile_user.py:
--------------------------------------------------------------------------------
1 | # %%
2 |
3 | import pandas as pd
4 | import sqlalchemy
5 | import datetime
6 |
7 | cluster_recencia = pd.read_pickle("../../models/cluster_recencia.pkl")
8 | cluster_fv = pd.read_pickle("../../models/cluster_fv.pkl")
9 | model_churn = pd.read_pickle("../../models/rf_2024_06_19.pkl")
10 |
11 | # %%
12 |
13 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db")
14 |
15 | with open("etl.sql", 'r') as open_file:
16 | query = open_file.read()
17 |
18 | df = pd.read_sql(query, engine)
19 |
20 | # %%
21 |
22 | df['prob_churn'] = model_churn['model'].predict_proba(df[model_churn['features']])[:,1]
23 | df['cluster_recencia'] = cluster_recencia['model'].predict(df[cluster_recencia['features']])
24 | df['cluster_fv'] = cluster_fv['model'].predict(df[cluster_fv['features']])
25 |
26 | columns = ['dtRef', 'idCustomer', 'prob_churn','cluster_recencia','cluster_fv']
27 |
28 | df_final = df[columns].copy()
29 | df_final['dtUpdate'] = datetime.datetime.now()
30 |
31 | df_final.to_sql('customer_profile', engine, index=False, if_exists='replace')
32 | # %%
33 |
--------------------------------------------------------------------------------
/src/rfv/analise_freq_valor.py:
--------------------------------------------------------------------------------
1 | # %%
2 | import pandas as pd
3 | import sqlalchemy
4 | import matplotlib.pyplot as plt
5 | import seaborn as sns
6 |
7 | from sklearn import cluster
8 | from sklearn import tree
9 | from sklearn import preprocessing
10 |
11 | # %%
12 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db")
13 |
14 | query = '''
15 |
16 | SELECT *
17 | FROM fs_general
18 | WHERE dtRef = (select max(dtRef) FROM fs_general)
19 |
20 | '''
21 |
22 | df = pd.read_sql(query, engine)
23 | df
24 | # %%
25 | plt.figure(dpi=400)
26 | sns.set_theme(style="darkgrid")
27 | sns.scatterplot(
28 | data=df,
29 | x="valorPoints",
30 | y="frequenciaDias",
31 | )
32 |
33 | plt.title("Frequencia vs Valor")
34 | plt.show()
35 | # %%
36 |
37 |
38 | minmax = preprocessing.MinMaxScaler()
39 |
40 | X_trans = minmax.fit_transform(df[['valorPoints','frequenciaDias']])
41 |
42 | # cluster_method = cluster.KMeans(n_clusters=5)
43 | cluster_method = cluster.AgglomerativeClustering(linkage='ward',n_clusters=5,)
44 | cluster_method.fit(X_trans)
45 |
46 | df['cluster'] = cluster_method.labels_
47 |
48 | plt.figure(dpi=400)
49 |
50 | for i in df['cluster'].unique():
51 | data = df[df['cluster']==i]
52 | sns.scatterplot(
53 | data=data,
54 | x="valorPoints",
55 | y="frequenciaDias",
56 | )
57 |
58 | plt.hlines(7.5, xmin=0,xmax=3000)
59 | plt.hlines(3.5, xmin=0,xmax=3000)
60 | plt.hlines(10.5, xmin=0,xmax=3000)
61 | plt.vlines(500, ymin=0,ymax=18)
62 | plt.vlines(1500, ymin=0,ymax=18)
63 |
64 | plt.show()
65 | df.groupby("cluster")['idCustomer'].count()
66 |
67 | # %%
68 |
69 | def rf_cluster(row):
70 |
71 | if (row['valorPoints'] < 500):
72 | if (row['frequenciaDias'] < 3.5):
73 | return "01-BB"
74 |
75 | elif (row['frequenciaDias'] < 7.5):
76 | return "02-MB"
77 |
78 | elif (row['frequenciaDias'] < 10.5):
79 | return "03-AB"
80 |
81 | else:
82 | return "04-SB"
83 |
84 | elif (row['valorPoints'] < 1600):
85 | if (row['frequenciaDias'] < 3.5):
86 | return "05-BM"
87 |
88 | elif (row['frequenciaDias'] < 7.5):
89 | return "06-MM"
90 |
91 | elif (row['frequenciaDias'] < 10.5):
92 | return "07-AM"
93 |
94 | else:
95 | return "08-SM"
96 |
97 | else:
98 | if (row['frequenciaDias'] < 3.5):
99 | return "09-BA"
100 |
101 | elif (row['frequenciaDias'] < 7.5):
102 | return "10-MA"
103 |
104 | elif (row['frequenciaDias'] < 10.5):
105 | return "11-AA"
106 |
107 | else:
108 | return "12-SA"
109 |
110 | df['cluster_rf'] = df.apply(rf_cluster, axis=1)
111 |
112 | plt.figure(dpi=400)
113 |
114 | for i in df['cluster_rf'].unique():
115 | data = df[df['cluster_rf']==i]
116 | sns.scatterplot(
117 | data=data,
118 | x="valorPoints",
119 | y="frequenciaDias",
120 | )
121 |
122 | plt.title("Cluster Frequencia vs Valor")
123 | plt.legend(df['cluster_rf'].unique())
124 |
125 | # %%
126 |
127 | clf = tree.DecisionTreeClassifier(random_state=42,
128 | min_samples_leaf=1,
129 | max_depth=None)
130 |
131 | clf.fit(df[['frequenciaDias', 'valorPoints']], df['cluster_rf'])
132 |
133 | model_freq_valor = pd.Series(
134 | {"model": clf,
135 | "features": ['frequenciaDias', 'valorPoints']}
136 | )
137 |
138 | model_freq_valor.to_pickle("../../models/cluster_fv.pkl")
139 |
140 | # %%
141 |
142 |
--------------------------------------------------------------------------------
/src/rfv/analise_recencia.py:
--------------------------------------------------------------------------------
1 | # %%
2 |
3 | import pandas as pd
4 | import sqlalchemy
5 |
6 | from sklearn import tree
7 |
8 | import matplotlib.pyplot as plt
9 |
10 | def ciclo_vida(row):
11 |
12 | if row['idadeBaseDias'] <=7:
13 | return '01-Nova'
14 |
15 | elif row['recenciaDias'] <= 2:
16 | return '02-Super Ativa'
17 |
18 | elif row['recenciaDias'] <= 6:
19 | return '03-Ativa Comum'
20 |
21 | elif row['recenciaDias'] <= 12:
22 | return '04-Ativa Fria'
23 |
24 | elif row['recenciaDias'] <= 18:
25 | return '05-Desiludida'
26 |
27 | else:
28 | return '06-Pre Churn'
29 |
30 | # %%
31 |
32 | # if __name__ == "__main__":
33 |
34 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db")
35 |
36 | query = '''
37 |
38 | SELECT *
39 | FROM fs_general
40 | WHERE dtRef = (select max(dtRef) FROM fs_general)
41 |
42 | '''
43 |
44 | df = pd.read_sql(query, engine)
45 |
46 | plt.figure(dpi=400)
47 | df["recenciaDias"].hist()
48 | plt.show()
49 |
50 | df_recencia = df[["recenciaDias", 'idadeBaseDias']].sort_values(by="recenciaDias").reset_index(drop=True)
51 | df_recencia["unit"] = 1
52 | df_recencia['Acum'] = df_recencia['unit'].cumsum()
53 | df_recencia["Pct Acum"] = df_recencia['Acum'] / df_recencia['Acum'].max()
54 |
55 | plt.plot(df_recencia["recenciaDias"], df_recencia["Pct Acum"], '-')
56 | plt.grid(True)
57 | plt.title("Dist. Recencia Acumulada")
58 | plt.xlabel("Recencia")
59 | plt.ylabel("Pct Acum.")
60 |
61 | df_recencia['CicloVida'] = df_recencia.apply(ciclo_vida, axis=1)
62 | df_recencia.groupby(by=['CicloVida']).agg({
63 | "recenciaDias":['mean', 'count'],
64 | "idadeBaseDias":['mean'],
65 | })
66 |
67 |
68 | # %%
69 |
70 | clf = tree.DecisionTreeClassifier(min_samples_leaf=1, max_depth=50, random_state=42)
71 | clf.fit(df_recencia[['recenciaDias', 'idadeBaseDias']], df_recencia['CicloVida'])
72 | model = pd.Series(
73 | {
74 | "model":clf,
75 | "features":['recenciaDias', 'idadeBaseDias']
76 | }
77 | )
78 |
79 | model.to_pickle("../../models/cluster_recencia.pkl")
80 | # %%
81 |
--------------------------------------------------------------------------------
/src/train/abt.sql:
--------------------------------------------------------------------------------
1 | WITH tb_fl_churn AS (
2 |
3 | SELECT t1.dtRef,
4 | t1.idCustomer,
5 | CASE WHEN t2.idCustomer IS NULL THEN 1 ELSE 0 END AS flChurn
6 |
7 | FROM fs_general AS t1
8 |
9 | LEFT JOIN fs_general AS t2
10 | ON t1.idCustomer = t2.idCustomer
11 | AND t1.dtRef = date(t2.dtRef, '-21 day')
12 |
13 | WHERE (t1.dtRef < DATE('2024-06-20', '-21 day')
14 | AND strftime('%d', t1.dtRef) = '01')
15 | OR t1.dtRef = DATE('2024-06-20', '-21 day')
16 |
17 | order by 1,2
18 |
19 | )
20 |
21 | SELECT t1.*,
22 |
23 | t2.recenciaDias,
24 | t2.frequenciaDias,
25 | t2.valorPoints,
26 | t2.idadeBaseDias,
27 | t2.flEmail,
28 | t3.qtdPointsManha,
29 | t3.qtdPointsTarde,
30 | t3.qtdPointsNoite,
31 | t3.pctPointsManha,
32 | t3.pctPointsTarde,
33 | t3.pctPointsNoite,
34 | t3.qtdTransacoesManha,
35 | t3.qtdTransacoesTarde,
36 | t3.qtdTransacoesNoite,
37 | t3.pctTransacoesManha,
38 | t3.pctTransacoesTarde,
39 | t3.pctTransacoesNoite,
40 | t4.saldoPointsD21,
41 | t4.saldoPointsD14,
42 | t4.saldoPointsD7,
43 | t4.pointsAcumuladosD21,
44 | t4.pointsAcumuladosD14,
45 | t4.pointsAcumuladosD7,
46 | t4.pointsResgatadosD21,
47 | t4.pointsResgatadosD14,
48 | t4.pointsResgatadosD7,
49 | t4.saldoPoints,
50 | t4.pointsAcumuladosVida,
51 | t4.pointsResgatadosVida,
52 | t4.pointsPorDia,
53 | t5.qtdeChatMessage,
54 | t5.qtdeListaPresença,
55 | t5.qtdeResgatarPonei,
56 | t5.qtdeTrocaPontos,
57 | t5.qtdePresençaStreak,
58 | t5.qtdeAirflowLover,
59 | t5.qtdeRLover,
60 | t5.pointsChatMessage,
61 | t5.pointsListaPresença,
62 | t5.pointsResgatarPonei,
63 | t5.pointsTrocaPontos,
64 | t5.pointsPresençaStreak,
65 | t5.pointsAirflowLover,
66 | t5.pointsRLover,
67 | t5.pctChatMessage,
68 | t5.pctListaPresença,
69 | t5.pctResgatarPonei,
70 | t5.pctTrocaPontos,
71 | t5.pctPresençaStreak,
72 | t5.pctAirflowLover,
73 | t5.pctRLover,
74 | t5.avgChatLive,
75 | t5.productMaxQtde,
76 | t6.qtdeDiasD21,
77 | t6.qtdeDiasD14,
78 | t6.qtdeDiasD7,
79 | t6.avgLiveMinutes,
80 | t6.sumLiveMinutes,
81 | t6.minLiveMinutes,
82 | t6.maxLiveMinutes,
83 | t6.qtdeTransacaoVida,
84 | t6.avgTransacaoDia
85 |
86 | FROM tb_fl_churn AS t1
87 |
88 | LEFT JOIN fs_general AS t2
89 | ON t1.idCustomer = t2.idCustomer
90 | AND t1.dtRef = t2.dtRef
91 |
92 | LEFT JOIN fs_horario AS t3
93 | ON t1.idCustomer = t3.idCustomer
94 | AND t1.dtRef = t3.dtRef
95 |
96 | LEFT JOIN fs_points AS t4
97 | ON t1.idCustomer = t4.idCustomer
98 | AND t1.dtRef = t4.dtRef
99 |
100 | LEFT JOIN fs_produtos AS t5
101 | ON t1.idCustomer = t5.idCustomer
102 | AND t1.dtRef = t5.dtRef
103 |
104 | LEFT JOIN fs_transacoes AS t6
105 | ON t1.idCustomer = t6.idCustomer
106 | AND t1.dtRef = t6.dtRef
--------------------------------------------------------------------------------
/src/train/export_abt.py:
--------------------------------------------------------------------------------
1 | # %%
2 |
3 | import pandas as pd
4 | import sqlalchemy
5 |
6 | from sklearn import model_selection
7 |
8 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db")
9 |
10 | # %%
11 | with open("abt.sql", 'r') as open_file:
12 | query = open_file.read()
13 |
14 | df = pd.read_sql(query, engine)
15 |
16 | oot = df[df['dtRef'] == df['dtRef'].max()].copy()
17 | df_train = df[df['dtRef'] < df['dtRef'].max()].copy()
18 |
19 | train, test = model_selection.train_test_split(df_train,
20 | random_state=42,
21 | stratify=df_train['flChurn'])
22 |
23 | train['partition_set_name'] = 'train'
24 | test['partition_set_name'] = 'test'
25 | oot['partition_set_name'] = 'oot'
26 |
27 | # %%
28 |
29 | df_full = pd.concat( [train,test,oot], axis=0, ignore_index=True )
30 | df_full.to_csv("../../data/abt_churn_20240620.csv", index=False, sep=";")
31 |
--------------------------------------------------------------------------------
/src/train/semma_ex.py:
--------------------------------------------------------------------------------
1 | # %%
2 |
3 | import pandas as pd
4 |
5 | from sklearn import model_selection
6 | from sklearn import ensemble
7 | from sklearn import pipeline
8 | from sklearn import metrics
9 |
10 | from feature_engine import selection
11 | from feature_engine import encoding
12 |
13 | # %%
14 |
15 | ## SAMPLE
16 | df = pd.read_excel("../../data/abt_churn.xlsx")
17 |
18 | target = 'flChurn'
19 | features = df.columns.tolist()[4:]
20 |
21 | # %%
22 | ### DATA PARTITION
23 | df_oot = df[ df['dtRef'] == df['dtRef'].max()]
24 |
25 | df_train = df[ df['dtRef'] < df['dtRef'].max()]
26 |
27 | # %%
28 | ### SAMPLING
29 |
30 | X_train, X_test, y_train, y_test = model_selection.train_test_split(
31 | df_train[features], df_train[target],
32 | train_size=0.8,
33 | random_state=42,
34 | stratify=df_train[target]
35 | )
36 |
37 | print("Taxa de resposta Train:", y_train.mean())
38 | print("Taxa de resposta Test:", y_test.mean())
39 |
40 | # %%
41 | ## EXPLORE
42 | describe = X_train.describe()
43 | na_values = X_train.isna().sum().sort_values()
44 |
45 | df_eda = X_train
46 | df_eda[target] = y_train
47 | df_eda.groupby(["flChurn"]).describe().T.head(50)
48 |
49 | # %%
50 | ## MODIFY
51 |
52 | cat_features = X_train.dtypes[X_train.dtypes == 'object'].index.tolist()
53 | X_train[cat_features]
54 |
55 | to_drop = ['pointsPorDia', 'avgChatLive']
56 |
57 | drop = selection.DropFeatures(features_to_drop=to_drop)
58 | onehot = encoding.OneHotEncoder(variables=['productMaxQtde'])
59 |
60 | # %%
61 | # MODEL
62 |
63 | model = ensemble.RandomForestClassifier(random_state=42)
64 |
65 | params = {
66 | "max_depth": [4,5,8,10,15],
67 | "min_samples_leaf": [10,15,20,50,100],
68 | "n_estimators":[100,200,500]
69 | }
70 |
71 | grid = model_selection.GridSearchCV(model,
72 | param_grid=params,
73 | scoring='roc_auc',
74 | cv=3,
75 | n_jobs=10)
76 |
77 | # %%
78 |
79 | model_pipe = pipeline.Pipeline([
80 | ('drop', drop),
81 | ('onehot', onehot),
82 | ('model', grid)
83 | ])
84 |
85 | model_pipe.fit(X_train[features], y_train)
86 |
87 | # %%
88 | ## ASSESS
89 |
90 | train_pred = model_pipe.predict_proba(X_train[features])
91 | test_pred = model_pipe.predict_proba(X_test[features])
92 | oot_pred = model_pipe.predict_proba(df_oot[features])
93 |
94 | auc_train = metrics.roc_auc_score(y_train, train_pred[:,1])
95 | auc_test = metrics.roc_auc_score(y_test, test_pred[:,1])
96 | auc_oot = metrics.roc_auc_score(df_oot[target], oot_pred[:,1])
97 |
98 | print("AUC Score train:", auc_train)
99 | print("AUC Score test:", auc_test)
100 | print("AUC Score oot:", auc_oot)
101 |
102 | metrics_values = {
103 | "train": auc_train,
104 | "test": auc_test,
105 | "oot": auc_oot,
106 | }
107 |
108 | model_export = pd.Series(
109 | {
110 | "model":model_pipe,
111 | "features":features,
112 | "metrics": metrics_values,
113 | }
114 | )
115 |
116 | model_export.to_pickle("../../models/rf_2024_06_19.pkl")
--------------------------------------------------------------------------------
/src/train/train.py:
--------------------------------------------------------------------------------
1 | # %%
2 | import datetime
3 |
4 | import pandas as pd
5 | import sqlalchemy
6 |
7 | from sklearn import ensemble
8 | from sklearn import metrics
9 | from sklearn import model_selection
10 | from sklearn import pipeline
11 |
12 | from feature_engine import encoding
13 |
14 | # %%
15 |
16 | # Aqui eu tenho a conexão com o banco de dados
17 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db")
18 |
19 | # Aqui eu tenho a query
20 | with open('abt.sql', 'r') as open_file:
21 | query = open_file.read()
22 |
23 | # Aqui processa e tras os dados
24 | df = pd.read_sql(query, engine)
25 |
26 | df.head()
27 | # %%
28 | ## Separação de bases entrei treino e oot
29 |
30 | df_oot = df[df['dtRef']==df['dtRef'].max()]
31 | df_train = df[df['dtRef']cohort).astype(int)
100 |
101 | acc = metrics.accuracy_score(y_true, y_pred)
102 | auc = metrics.roc_auc_score(y_true, y_proba[:,1])
103 | precision = metrics.precision_score(y_true, y_pred)
104 | recall = metrics.recall_score(y_true, y_pred)
105 |
106 | res = {
107 | 'Acurárica': acc,
108 | 'Curva Roc': auc,
109 | "Precisão": precision,
110 | "Recall": recall,
111 | }
112 |
113 | return res
114 |
115 | report_train = report_metrics(y_train, y_train_proba)
116 | report_train['base'] = 'Train'
117 |
118 | report_test = report_metrics(y_test, y_test_proba)
119 | report_test['base'] = 'Test'
120 |
121 | report_oot = report_metrics(df_oot[target], y_oot_proba)
122 | report_oot['base'] = 'Oot'
123 |
124 | df_metrics = pd.DataFrame([report_train,report_test,report_oot])
125 | print(df_metrics)
126 |
127 | # %%
128 |
129 | model_series = pd.Series({
130 | "model": model_pipeline,
131 | "features": features,
132 | "metrics": df_metrics,
133 | "dt_train": datetime.datetime.now()
134 | })
135 |
136 | model_series.to_pickle("../../models/rf_teo_fim_curso.pkl")
137 |
--------------------------------------------------------------------------------
/src/train/train_mlflow.py:
--------------------------------------------------------------------------------
1 | # %%
2 | import datetime
3 |
4 | import pandas as pd
5 | import sqlalchemy
6 |
7 | import mlflow
8 |
9 | from sklearn import ensemble
10 | from sklearn import metrics
11 | from sklearn import model_selection
12 | from sklearn import pipeline
13 |
14 | from feature_engine import encoding
15 |
16 | # %%
17 |
18 | # Aqui eu tenho a conexão com o banco de dados
19 | engine = sqlalchemy.create_engine("sqlite:///../../data/feature_store.db")
20 |
21 | # Aqui eu tenho a query
22 | with open('abt.sql', 'r') as open_file:
23 | query = open_file.read()
24 |
25 | # Aqui processa e tras os dados
26 | df = pd.read_sql(query, engine)
27 |
28 | df.head()
29 | # %%
30 | ## Separação de bases entrei treino e oot
31 |
32 | df_oot = df[df['dtRef']==df['dtRef'].max()]
33 | df_train = df[df['dtRef']cohort).astype(int)
75 |
76 | acc = metrics.accuracy_score(y_true, y_pred)
77 | auc = metrics.roc_auc_score(y_true, y_proba[:,1])
78 | precision = metrics.precision_score(y_true, y_pred)
79 | recall = metrics.recall_score(y_true, y_pred)
80 |
81 | res = {
82 | f'{base} Acurárica': acc,
83 | f'{base} Curva Roc': auc,
84 | f"{base} Precisão": precision,
85 | f"{base} Recall": recall,
86 | }
87 |
88 | return res
89 |
90 | with mlflow.start_run():
91 |
92 | onehot = encoding.OneHotEncoder(variables=cat_features,
93 | drop_last=True)
94 |
95 | model = ensemble.GradientBoostingClassifier(random_state=42)
96 |
97 | params = {"learning_rate": [0.01,0.1,0.2,0.5,0.75,0.9,0.99],
98 | "n_estimators": [50,100,200,500],
99 | "subsample": [0.1,0.5,0.9],
100 | "min_samples_leaf":[5,10,25,50,100]
101 | }
102 |
103 | grid = model_selection.GridSearchCV(model,
104 | param_grid=params,
105 | cv=3,
106 | scoring='roc_auc',
107 | n_jobs=-2,
108 | verbose=3)
109 |
110 | model_pipeline = pipeline.Pipeline([
111 | ('One Hot Encode', onehot),
112 | ('Modelo', grid)
113 | ])
114 |
115 | # Ajuste de modelo
116 | model_pipeline.fit(X_train, y_train)
117 |
118 | y_train_proba = model_pipeline.predict_proba(X_train)
119 | y_test_proba = model_pipeline.predict_proba(X_test)
120 | y_oot_proba = model_pipeline.predict_proba(df_oot[features])
121 |
122 | report = {}
123 | report.update(report_metrics(y_train, y_train_proba, 'treino'))
124 | report.update(report_metrics(y_test, y_test_proba, 'teste'))
125 | report.update(report_metrics(df_oot[target], y_oot_proba, 'ott'))
126 |
127 | mlflow.log_metrics(report)
128 |
--------------------------------------------------------------------------------