├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── examples
    ├── model_report
    │   ├── auto_mining_rules
    │   │   ├── combiner_rules_0.png
    │   │   ├── combiner_rules_1.png
    │   │   └── combiner_rules_2.png
    │   ├── bin_plots
    │   │   ├── bin_vars_A.png
    │   │   ├── bin_vars_B.png
    │   │   ├── bin_vars_C.png
    │   │   ├── bin_vars_D.png
    │   │   └── bin_vars_时间.png
    │   └── 决策树组合策略挖掘.xlsx
    └── pdtr_samplts.ipynb
├── pdtr
    ├── __init__.py
    ├── matplot_chinese.ttf
    └── transforme.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | 
162 | # itlubber
163 | *.DS_Store
164 | *.pkl
165 | *.virtual_documents
166 | test.*


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 itlubber
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include pdtr/*.ttf pdtr/*.xlsx
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 自动决策树规则挖掘工具包
  2 | 
  3 | 在笔者金融风控的日常工作中，很多时候需要根据数据集内的诸多特征（有很多其他称呼，比如因子、变量、自变量、解释变量等）来挖掘一些有用的规则和组合策略，在保证通过率的基础上尽可能多的拒绝坏客户。面对成千上万的特征，如何从数据集中找到有效的规则和组合策略，一直以来都是金融风控搬砖工的日常工作。 `pdtr` 旨在帮助读者快速从高维数据中提取出有效的规则和组合策略。
  4 | 
  5 | > 仓库地址：https://github.com/itlubber/pdtr
  6 | > 
  7 | > 博文地址：https://itlubber.art/archives/auto-strategy-mining
  8 | > 
  9 | > 微信公共号推文：https://mp.weixin.qq.com/s/8s785MfmVznNgQyy38YnWw
 10 | > 
 11 | > pipy包：https://pypi.org/project/pdtr/
 12 | 
 13 | ## 交流
 14 | 
 15 | |  微信 |  微信公众号 |
 16 | | :---: | :----: |
 17 | | <img src="https://itlubber.art//upload/itlubber.png" alt="itlubber.png" width="50%" border=0/> | <img src="https://itlubber.art//upload/itlubber_art.png" alt="itlubber_art.png" width="50%" border=0/> |
 18 | |  itlubber  | itlubber_art |
 19 | 
 20 | 
 21 | ## 背景简介
 22 | 
 23 | 金融场景风险大致可以概括为三种：系统性风险、欺诈风险（无还款意愿）、信用风险（无还款能力），而作为一名风控搬砖工，日常工作中有大量的数据挖掘工作，如何从高维数据集中挖掘出行之有效的规则、策略及模型来防范欺诈风险和信用风险每个搬砖工的基操。本仓库由笔者基于网上开源的一系列相关知识，结合实际工作中遇到的实际需求，整理得到。旨在为诸位仁兄提供一个便捷、高效、赏心悦目的决策树组合策略挖掘报告，及一系列能够实际运用到风险控制上的策略。
 24 | 
 25 | ## 项目结构
 26 | 
 27 | ```bash
 28 | pdtr
 29 | .
 30 | |   README.md                           # 说明文档
 31 | |   setup.py                            # 打包发布文件
 32 | |   LICENSE                             # 开源协议
 33 | |   requirements.txt                    # 项目依赖包
 34 | +---examples                            # 演示样例
 35 | |   |   combine_rules_cache             # 缓存文件
 36 | |   |   combine_rules_cache.svg         # 缓存文件
 37 | |   |   pdtr_samplts.ipynb              # 演示样例程序
 38 | |   \---model_report                    # 模型报告输出文件夹
 39 | |       |   决策树组合策略挖掘.xlsx        # 策略挖掘报告
 40 | |       +---auto_mining_rules           # 组合策略可视化存储文件夹
 41 | |       |       combiner_rules_0.png    # 决策树可视化图片
 42 | |       |       ......
 43 | |       \---bin_plots                   # 简单策略可视化存储文件夹
 44 | |               bin_vars_A.png          # 变量分箱可视化图片
 45 | |               ......
 46 | \---pdtr                                # PDTR 源码包
 47 |         template.xlsx                   # excel 模版文件
 48 |         excel_writer.py                 # excel写入公共方法
 49 |         matplot_chinese.ttf             # matplotlib 中文字体
 50 |         transforme.py                   # 策略挖掘方法
 51 | ```
 52 | 
 53 | ## 环境准备
 54 | 
 55 | ### 创建虚拟环境（可选）
 56 | 
 57 | + 通过`conda`创建虚拟环境
 58 | 
 59 | ```bash
 60 | >> conda create -n score python==3.8.13
 61 | 
 62 | Collecting package metadata (current_repodata.json): done
 63 | Solving environment: failed with repodata from current_repodata.json, will retry with next repodata source.
 64 | Collecting package metadata (repodata.json): done
 65 | Solving environment: done
 66 | 
 67 | 
 68 | ==> WARNING: A newer version of conda exists. <==
 69 |   current version: 4.10.3
 70 |   latest version: 23.3.1
 71 | 
 72 | Please update conda by running
 73 | 
 74 |     $ conda update -n base -c defaults conda
 75 | 
 76 | 
 77 | 
 78 | ## Package Plan ##
 79 | 
 80 |   environment location: /Users/lubberit/anaconda3/envs/score
 81 | 
 82 |   added / updated specs:
 83 |     - python==3.8.13
 84 | 
 85 | 
 86 | The following packages will be downloaded:
 87 | 
 88 |     package                    |            build
 89 |     ---------------------------|-----------------
 90 |     ca-certificates-2023.01.10 |       hecd8cb5_0         121 KB
 91 |     ncurses-6.4                |       hcec6c5f_0        1018 KB
 92 |     openssl-1.1.1t             |       hca72f7f_0         3.3 MB
 93 |     pip-23.0.1                 |   py38hecd8cb5_0         2.5 MB
 94 |     python-3.8.13              |       hdfd78df_1        10.8 MB
 95 |     setuptools-66.0.0          |   py38hecd8cb5_0         1.2 MB
 96 |     sqlite-3.41.2              |       h6c40b1e_0         1.2 MB
 97 |     wheel-0.38.4               |   py38hecd8cb5_0          65 KB
 98 |     xz-5.4.2                   |       h6c40b1e_0         372 KB
 99 |     ------------------------------------------------------------
100 |                                            Total:        20.5 MB
101 | 
102 | The following NEW packages will be INSTALLED:
103 | 
104 |   ca-certificates    pkgs/main/osx-64::ca-certificates-2023.01.10-hecd8cb5_0
105 |   libcxx             pkgs/main/osx-64::libcxx-14.0.6-h9765a3e_0
106 |   libffi             pkgs/main/osx-64::libffi-3.3-hb1e8313_2
107 |   ncurses            pkgs/main/osx-64::ncurses-6.4-hcec6c5f_0
108 |   openssl            pkgs/main/osx-64::openssl-1.1.1t-hca72f7f_0
109 |   pip                pkgs/main/osx-64::pip-23.0.1-py38hecd8cb5_0
110 |   python             pkgs/main/osx-64::python-3.8.13-hdfd78df_1
111 |   readline           pkgs/main/osx-64::readline-8.2-hca72f7f_0
112 |   setuptools         pkgs/main/osx-64::setuptools-66.0.0-py38hecd8cb5_0
113 |   sqlite             pkgs/main/osx-64::sqlite-3.41.2-h6c40b1e_0
114 |   tk                 pkgs/main/osx-64::tk-8.6.12-h5d9f67b_0
115 |   wheel              pkgs/main/osx-64::wheel-0.38.4-py38hecd8cb5_0
116 |   xz                 pkgs/main/osx-64::xz-5.4.2-h6c40b1e_0
117 |   zlib               pkgs/main/osx-64::zlib-1.2.13-h4dc903c_0
118 | 
119 | 
120 | Proceed ([y]/n)? y
121 | 
122 | 
123 | Downloading and Extracting Packages
124 | sqlite-3.41.2        | 1.2 MB    | ################################################################################################### | 100% 
125 | wheel-0.38.4         | 65 KB     | ################################################################################################### | 100% 
126 | openssl-1.1.1t       | 3.3 MB    | ################################################################################################### | 100% 
127 | python-3.8.13        | 10.8 MB   | ################################################################################################### | 100% 
128 | setuptools-66.0.0    | 1.2 MB    | ################################################################################################### | 100% 
129 | ncurses-6.4          | 1018 KB   | ################################################################################################### | 100% 
130 | xz-5.4.2             | 372 KB    | ################################################################################################### | 100% 
131 | ca-certificates-2023 | 121 KB    | ################################################################################################### | 100% 
132 | pip-23.0.1           | 2.5 MB    | ################################################################################################### | 100% 
133 | Preparing transaction: done
134 | Verifying transaction: done
135 | Executing transaction: done
136 | #
137 | # To activate this environment, use
138 | #
139 | #     $ conda activate score
140 | #
141 | # To deactivate an active environment, use
142 | #
143 | #     $ conda deactivate
144 | ```
145 | 
146 | + 通过`pyenv`创建虚拟环境
147 | 
148 | ```bash
149 | # 安装环境
150 | >> pyenv install -v 3.8.13
151 | # 启动环境
152 | >> pyenv local 3.8.13
153 | # 卸载环境
154 | >> pyenv uninstall 3.8.13
155 | ```
156 | 
157 | 
158 | ### 安装项目依赖
159 | 
160 | ```bash
161 | >> pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
162 | 
163 | Looking in indexes: http://mirrors.aliyun.com/pypi/simple/
164 | ......
165 | Installing collected packages: webencodings, six, pytz, colour, zipp, tomli, tinycss2, threadpoolctl, python-dateutil, pyparsing, pycparser, pluggy, pillow, packaging, numpy, kiwisolver, joblib, iniconfig, graphviz, fonttools, exceptiongroup, et-xmlfile, defusedxml, cycler, scipy, pytest, patsy, pandas, openpyxl, importlib-resources, cssselect2, contourpy, cffi, statsmodels, scikit-learn, matplotlib, cairocffi, dtreeviz, category-encoders, CairoSVG
166 | Successfully installed CairoSVG-2.7.0 cairocffi-1.5.1 category-encoders-2.6.0 cffi-1.15.1 colour-0.1.5 contourpy-1.0.7 cssselect2-0.7.0 cycler-0.11.0 defusedxml-0.7.1 dtreeviz-2.2.1 et-xmlfile-1.1.0 exceptiongroup-1.1.1 fonttools-4.39.4 graphviz-0.20.1 importlib-resources-5.12.0 iniconfig-2.0.0 joblib-1.2.0 kiwisolver-1.4.4 matplotlib-3.7.1 numpy-1.22.2 openpyxl-3.0.7 packaging-23.1 pandas-1.5.3 patsy-0.5.3 pillow-9.5.0 pluggy-1.0.0 pycparser-2.21 pyparsing-3.0.9 pytest-7.3.1 python-dateutil-2.8.2 pytz-2023.3 scikit-learn-1.2.2 scipy-1.10.1 six-1.11.0 statsmodels-0.14.0 threadpoolctl-3.1.0 tinycss2-1.2.1 tomli-2.0.1 webencodings-0.5.1 zipp-3.15.0
167 | ```
168 | 
169 | 
170 | ### `PDTR` 安装
171 | 
172 | ```bash
173 | pip install pdtr
174 | ```
175 | 
176 | ### 版本介绍
177 | 
178 | + `0.1.0`
179 | 
180 | 仅包含决策树策略挖掘相关工具
181 | 
182 | + `0.1.1`
183 | 
184 | 除版本 `0.1.0` 中的决策树挖掘相关工具以外，新增了基于 `toad` 和 `optbinning` 的单变量策略挖掘相关方法
185 | 
186 | + `0.1.2`
187 | 
188 | 在 `0.1.1` 的基础上增加了部分方法的文档注释
189 | 
190 | 
191 | ### 运行样例
192 | 
193 | + 导入相关依赖
194 | 
195 | ```python
196 | import os
197 | import numpy as np
198 | import pandas as pd
199 | from sklearn.model_selection import train_test_split
200 | 
201 | try:
202 |     from pdtr import ParseDecisionTreeRules
203 | except ModuleNotFoundError:
204 |     import sys
205 |     
206 |     sys.path.append("../")
207 |     from pdtr import ParseDecisionTreeRules
208 |     
209 | np.random.seed(1)
210 | ```
211 | 
212 | + 数据集加载
213 | 
214 | ```python
215 | feature_map = {}
216 | n_samples = 10000
217 | ab = np.array(list('ABCDEFG'))
218 | 
219 | data = pd.DataFrame({
220 |     'A': np.random.randint(10, size = n_samples),
221 |     'B': ab[np.random.choice(7, n_samples)],
222 |     'C': ab[np.random.choice(2, n_samples)],
223 |     'D': np.random.random(size = n_samples),
224 |     'target': np.random.randint(2, size = n_samples)
225 | })
226 | ```
227 | 
228 | + 数据集拆分
229 | 
230 | ```python
231 | train, test = train_test_split(data, test_size=0.3, shuffle=data["target"])
232 | ```
233 | 
234 | + 决策树自动规则挖掘
235 | 
236 | ```python
237 | pdtr_instance = ParseDecisionTreeRules(target="target", max_iter=8, output="model_report/决策树组合策略挖掘.xlsx")
238 | pdtr_instance.fit(train, lift=0., max_depth=2, max_samples=1., verbose=False, max_features="auto")
239 | ```
240 | 
241 | + 规则验证
242 | 
243 | ```python
244 | all_rules = pdtr_instance.insert_all_rules(test=test)
245 | ```
246 | 
247 | + 导出策略挖掘报告
248 | 
249 | ```python
250 | pdtr_instance.save()
251 | ```
252 | 
253 | + 挖掘报告
254 | 
255 | [`examples/决策树组合策略挖掘.xlsx`](https://github.com/itlubber/pdtr/blob/main/examples/model_report/%E5%86%B3%E7%AD%96%E6%A0%91%E7%BB%84%E5%90%88%E7%AD%96%E7%95%A5%E6%8C%96%E6%8E%98.xlsx)
256 | 
257 | 
258 | ## 参考
259 | 
260 | > https://github.com/itlubber/LogisticRegressionPipeline
261 | > 
262 | > https://github.com/itlubber/itlubber-excel-writer
263 | 


--------------------------------------------------------------------------------
/examples/model_report/auto_mining_rules/combiner_rules_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/auto_mining_rules/combiner_rules_0.png


--------------------------------------------------------------------------------
/examples/model_report/auto_mining_rules/combiner_rules_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/auto_mining_rules/combiner_rules_1.png


--------------------------------------------------------------------------------
/examples/model_report/auto_mining_rules/combiner_rules_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/auto_mining_rules/combiner_rules_2.png


--------------------------------------------------------------------------------
/examples/model_report/bin_plots/bin_vars_A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/bin_plots/bin_vars_A.png


--------------------------------------------------------------------------------
/examples/model_report/bin_plots/bin_vars_B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/bin_plots/bin_vars_B.png


--------------------------------------------------------------------------------
/examples/model_report/bin_plots/bin_vars_C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/bin_plots/bin_vars_C.png


--------------------------------------------------------------------------------
/examples/model_report/bin_plots/bin_vars_D.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/bin_plots/bin_vars_D.png


--------------------------------------------------------------------------------
/examples/model_report/bin_plots/bin_vars_时间.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/bin_plots/bin_vars_时间.png


--------------------------------------------------------------------------------
/examples/model_report/决策树组合策略挖掘.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/examples/model_report/决策树组合策略挖掘.xlsx


--------------------------------------------------------------------------------
/pdtr/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @Time    : 2023/5/15 17:55
 4 | @Author  : itlubber
 5 | @Site    : itlubber.art
 6 | """
 7 | 
 8 | from .transforme import ParseDecisionTreeRules
 9 | 
10 | 
11 | __version__ = "0.1.5"
12 | __all__ = ["ParseDecisionTreeRules"]
13 | 


--------------------------------------------------------------------------------
/pdtr/matplot_chinese.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itlubber/pdtr/9d60f6cba7fc17473b1c26e199a24e2b5cff1d6b/pdtr/matplot_chinese.ttf


--------------------------------------------------------------------------------
/pdtr/transforme.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @Time    : 2023/5/15 17:55
  4 | @Author  : itlubber
  5 | @Site    : itlubber.art
  6 | """
  7 | 
  8 | import warnings
  9 | import os
 10 | import re
 11 | import graphviz
 12 | import dtreeviz
 13 | import numpy as np
 14 | import pandas as pd
 15 | import matplotlib.pyplot as plt
 16 | from matplotlib import font_manager
 17 | from openpyxl.worksheet.worksheet import Worksheet
 18 | 
 19 | import category_encoders as ce
 20 | from optbinning import OptimalBinning
 21 | from sklearn.tree import DecisionTreeClassifier
 22 | from scorecardpipeline import ExcelWriter, Combiner, feature_bin_stats, bin_plot, dataframe2excel
 23 | 
 24 | 
 25 | class ParseDecisionTreeRules:
 26 | 
 27 |     def __init__(self, target="target", labels=["positive", "negative"], feature_map={}, nan=-1., max_iter=128, writer=None, combiner=None, seed=None, theme_color="2639E9"):
 28 |         """决策树自动规则挖掘工具包
 29 | 
 30 |         :param target: 数据集中好坏样本标签列名称，默认 target
 31 |         :param labels: 好坏样本标签名称，传入一个长度为2的列表，第0个元素为好样本标签，第1个元素为坏样本标签，默认 ["positive", "negative"]
 32 |         :param feature_map: 变量名称及其含义，在后续输出报告和策略信息时增加可读性，默认 {}
 33 |         :param nan: 在决策树策略挖掘时，默认空值填充的值，默认 -1
 34 |         :param max_iter: 最多支持在数据集上训练多少颗树模型，每次生成一棵树后，会剔除特征重要性最高的特征后，再生成树，默认 128
 35 |         :param output: excel 挖掘报告保存的路径， 默认 model_report/决策树组合策略挖掘.xlsx
 36 |         :param writer: 在之前程序运行时生成的 ExcelWriter，可以支持传入一个已有的writer，后续所有内容将保存至该workbook中，默认 None
 37 |         :param combiner: 可以传入提前训练好的 combiner，支持 toad.transform.Combiner 和 笔者重写的 Combiner
 38 |         """
 39 |         self.seed = seed
 40 |         self.nan = nan
 41 |         self.target = target
 42 |         self.labels = labels
 43 |         self.theme_color = theme_color
 44 |         self.feature_map = feature_map
 45 |         self.decision_trees = []
 46 |         self.max_iter = max_iter
 47 |         if combiner:
 48 |             self.combiner = combiner
 49 |         else:
 50 |             self.combiner = Combiner()
 51 |         self.target_enc = None
 52 |         self.feature_names = None
 53 |         self.dt_rules = pd.DataFrame()
 54 |         self.end_row = 2
 55 |         self.start_col = 2
 56 |         self.describe_columns = ["组合策略", "命中数", "命中率", "好样本数", "好样本占比", "坏样本数", "坏样本占比", "坏率", "样本整体坏率", "LIFT值"]
 57 | 
 58 |         self.init_setting()
 59 | 
 60 |         if writer:
 61 |             self.writer = writer
 62 |         else:
 63 |             self.writer = ExcelWriter(theme_color=self.theme_color)
 64 |     
 65 |     @staticmethod
 66 |     def init_setting(font_path=None):
 67 |         if "seaborn-ticks" in plt.style.available:
 68 |             plt.style.use('seaborn-ticks')
 69 |         else:
 70 |             plt.style.use('seaborn-v0_8-ticks')
 71 | 
 72 |         font_path = font_path or os.path.join(os.path.dirname(os.path.abspath(__file__)), 'matplot_chinese.ttf')
 73 |         if not os.path.isfile(font_path):
 74 |             import wget
 75 |             font_path = wget.download("https://itlubber.art/upload/matplot_chinese.ttf", os.path.join(os.path.dirname(os.path.abspath(__file__)), 'matplot_chinese.ttf'))
 76 | 
 77 |         font_manager.fontManager.addfont(font_path)
 78 |         plt.rcParams['font.family'] = font_manager.FontProperties(fname=font_path).get_name()
 79 |         plt.rcParams['axes.unicode_minus'] = False
 80 | 
 81 |     def encode_cat_features(self, X, y):
 82 |         cat_features = list(set(X.select_dtypes(include=[object, pd.CategoricalDtype]).columns))
 83 |         cat_features_index = [i for i, f in enumerate(X.columns) if f in cat_features]
 84 | 
 85 |         if len(cat_features) > 0:
 86 |             if self.target_enc is None:
 87 |                 self.target_enc = ce.TargetEncoder(cols=cat_features)
 88 |                 self.target_enc.fit(X[cat_features], y)
 89 |                 self.target_enc.target_mapping = {}
 90 |                 X_TE = X.join(self.target_enc.transform(X[cat_features]).add_suffix('_target'))
 91 |                 for col in cat_features:
 92 |                     mapping = X_TE[[col, f"{col}_target"]].drop_duplicates()
 93 |                     self.target_enc.target_mapping[col] = dict(zip(mapping[col], mapping[f"{col}_target"]))
 94 |             else:
 95 |                 X_TE = X.join(self.target_enc.transform(X[cat_features]).add_suffix('_target'))
 96 | 
 97 |             X_TE = X_TE.drop(columns=cat_features)
 98 |             return X_TE.rename(columns={f"{c}_target": c for c in cat_features})
 99 |         else:
100 |             return X
101 | 
102 |     def get_dt_rules(self, tree, feature_names, total_bad_rate, total_count):
103 |         tree_ = tree.tree_
104 |         left = tree.tree_.children_left
105 |         right = tree.tree_.children_right
106 |         feature_name = [feature_names[i] if i != -2 else "undefined!" for i in tree_.feature]
107 |         rules = dict()
108 | 
109 |         global res_df
110 |         res_df = pd.DataFrame()
111 | 
112 |         def recurse(node, depth, parent):  # 搜每个节点的规则
113 | 
114 |             if tree_.feature[node] != -2:  # 非叶子节点,搜索每个节点的规则
115 |                 name = feature_name[node]
116 |                 thd = np.round(tree_.threshold[node], 3)
117 |                 s = "{} <= {} ".format(name, thd, node)
118 |                 # 左子
119 |                 if node == 0:
120 |                     rules[node] = s
121 |                 else:
122 |                     rules[node] = rules[parent] + ' & ' + s
123 |                 recurse(left[node], depth + 1, node)
124 |                 s = "{} > {}".format(name, thd)
125 |                 # 右子 
126 |                 if node == 0:
127 |                     rules[node] = s
128 |                 else:
129 |                     rules[node] = rules[parent] + ' & ' + s
130 |                 recurse(right[node], depth + 1, node)
131 |             else:
132 |                 df = pd.DataFrame()
133 |                 df['组合策略'] = rules[parent],
134 |                 df['好样本数'] = tree_.value[node][0][0].astype(int)
135 |                 df['好样本占比'] = df['好样本数'] / (total_count * (1 - total_bad_rate))
136 |                 df['坏样本数'] = tree_.value[node][0][1].astype(int)
137 |                 df['坏样本占比'] = df['坏样本数'] / (total_count * total_bad_rate)
138 |                 df['命中数'] = df['好样本数'] + df['坏样本数']
139 |                 df['命中率'] = df['命中数'] / total_count
140 |                 df['坏率'] = df['坏样本数'] / df['命中数']
141 |                 df['样本整体坏率'] = total_bad_rate
142 |                 df['LIFT值'] = df['坏率'] / df['样本整体坏率']
143 | 
144 |                 global res_df
145 | 
146 |                 res_df = pd.concat([res_df, df], axis=0)
147 | 
148 |         recurse(0, 1, 0)
149 | 
150 |         return res_df.sort_values("LIFT值", ascending=True)[self.describe_columns].reset_index(drop=True)
151 | 
152 |     def select_dt_rules(self, decision_tree, x, y, lift=0., max_samples=1., labels=["positive", "negative"], save=None, verbose=False, drop=False):
153 |         rules = self.get_dt_rules(decision_tree, x.columns, sum(y) / len(y), len(y))
154 |         total_rules = len(rules)
155 | 
156 |         try:
157 |             viz_model = dtreeviz.model(decision_tree,
158 |                                        X_train=x,
159 |                                        y_train=y,
160 |                                        feature_names=x.columns,
161 |                                        target_name=self.target,
162 |                                        class_names=labels,
163 |                                        )
164 |         except AttributeError:
165 |             raise "请检查 dtreeviz 版本"
166 | 
167 |         rules = rules.query(f"LIFT值 >= {lift} & 命中率 <= {max_samples}").reset_index(drop=True)
168 | 
169 |         if len(rules) > 0:
170 |             font_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'matplot_chinese.ttf')
171 |             font_manager.fontManager.addfont(font_path)
172 |             plt.rcParams['font.family'] = font_manager.FontProperties(fname=font_path).get_name()
173 |             plt.rcParams['axes.unicode_minus'] = False
174 | 
175 |             decision_tree_viz = viz_model.view(
176 |                 scale=1.5,
177 |                 orientation='LR',
178 |                 colors={
179 |                     "classes": [None, None, ["#2639E9", "#F76E6C"], ["#2639E9", "#F76E6C", "#FE7715", "#FFFFFF"]],
180 |                     "arrow": "#2639E9",
181 |                     'text_wedge': "#F76E6C",
182 |                     "pie": "#2639E9",
183 |                     "tile_alpha": 1,
184 |                     "legend_edge": "#FFFFFF",
185 |                 },
186 |                 ticks_fontsize=10,
187 |                 label_fontsize=10,
188 |                 fontname=plt.rcParams['font.family'],
189 |             )
190 |             if verbose:
191 |                 from IPython.core.display_functions import display
192 |                 if self.feature_map is not None and len(self.feature_map) > 0:
193 |                     display(rules.replace(self.feature_map, regex=True))
194 |                 else:
195 |                     display(rules)
196 |                 display(decision_tree_viz)
197 |             if save:
198 |                 if os.path.dirname(save) and not os.path.exists(os.path.dirname(save)):
199 |                     os.makedirs(os.path.dirname(save))
200 | 
201 |                 try:
202 |                     decision_tree_viz.save("combine_rules_cache.svg")
203 |                 except graphviz.backend.execute.ExecutableNotFound:
204 |                     print("请确保您已安装 graphviz 程序并且正确配置了 PATH 路径。可参考: https://stackoverflow.com/questions/35064304/runtimeerror-make-sure-the-graphviz-executables-are-on-your-systems-path-aft")
205 | 
206 |                 try:
207 |                     import cairosvg
208 |                     cairosvg.svg2png(url="combine_rules_cache.svg", write_to=save, dpi=240)
209 |                 except:
210 |                     from reportlab.graphics import renderPDF
211 |                     from svglib.svglib import svg2rlg
212 |                     drawing = svg2rlg("combine_rules_cache.svg")
213 |                     renderPDF.drawToFile(drawing, save, dpi=240, fmt="PNG")
214 | 
215 |         if os.path.isfile("combine_rules_cache.svg"):
216 |             os.remove("combine_rules_cache.svg")
217 |         
218 |         if os.path.isfile("combine_rules_cache"):
219 |             os.remove("combine_rules_cache")
220 |         
221 |         if drop:
222 |             if len(rules) > 0:
223 |                 return rules, decision_tree.feature_names_in_[list(decision_tree.feature_importances_).index(max(decision_tree.feature_importances_))], total_rules
224 |             else:
225 |                 return rules, decision_tree.feature_names_in_[list(decision_tree.feature_importances_).index(min(decision_tree.feature_importances_))], total_rules
226 |         else:
227 |             return rules, total_rules
228 | 
229 |     def query_dt_rules(self, x, y, parsed_rules=None):
230 |         total_count = len(y)
231 |         total_bad_rate = y.sum() / len(y)
232 | 
233 |         rules = pd.DataFrame()
234 | 
235 |         if isinstance(parsed_rules, pd.DataFrame):
236 |             parsed_rules = parsed_rules["组合策略"].unique()
237 | 
238 |         for rule in parsed_rules:
239 |             select_index = x.query(rule).index
240 |             if len(select_index) > 0:
241 |                 y_select = y[select_index]
242 |                 df = pd.Series()
243 |                 df['组合策略'] = rule
244 |                 df['好样本数'] = len(y_select) - y_select.sum()
245 |                 df['好样本占比'] = df['好样本数'] / (total_count * (1 - total_bad_rate))
246 |                 df['坏样本数'] = y_select.sum()
247 |                 df['坏样本占比'] = df['坏样本数'] / (total_count * total_bad_rate)
248 |                 df['命中数'] = df['好样本数'] + df['坏样本数']
249 |                 df['命中率'] = df['命中数'] / total_count
250 |                 df['坏率'] = df['坏样本数'] / df['命中数']
251 |                 df['样本整体坏率'] = total_bad_rate
252 |                 df['LIFT值'] = df['坏率'] / df['样本整体坏率']
253 |             else:
254 |                 df = pd.Series({'组合策略': rule, '好样本数': 0, '好样本占比': 0., '坏样本数': 0, '坏样本占比': 0., '命中数': 0, '命中率': 0., '坏率': 0., '样本整体坏率': total_bad_rate, 'LIFT值': 0., })
255 | 
256 |             rules = pd.concat([rules, pd.DataFrame(df).T]).reset_index(drop=True)
257 | 
258 |         return rules[self.describe_columns]
259 | 
260 |     def insert_dt_rules(self, parsed_rules, end_row, start_col, save=None, sheet=None, figsize=(500, 350)):
261 |         if isinstance(sheet, Worksheet):
262 |             worksheet = sheet
263 |         else:
264 |             worksheet = self.writer.get_sheet_by_name(sheet or "决策树组合策略挖掘")
265 | 
266 |         end_row, end_col = dataframe2excel(parsed_rules, self.writer, sheet_name=worksheet, start_row=end_row + 1, start_col=start_col, percent_cols=['好样本占比', '坏样本占比', '命中率', '坏率', '样本整体坏率', 'LIFT值'], condition_cols=["坏率", "LIFT值"])
267 | 
268 |         if save is not None:
269 |             end_row, end_col = self.writer.insert_pic2sheet(worksheet, save, (end_row + 1, start_col), figsize=figsize)
270 | 
271 |         return end_row, end_col
272 | 
273 |     def fit(self, x, y=None, max_depth=2, lift=0., max_samples=1., min_score=None, verbose=False, *args, **kwargs):
274 |         """组合策略挖掘
275 | 
276 |         :param x: 包含标签的数据集
277 |         :param max_depth: 决策树最大深度，即最多组合的特征个数，默认 2
278 |         :param lift: 组合策略最小的lift值，默认 0.，即全部组合策略
279 |         :param max_samples: 每条组合策略的最大样本占比，默认 1.0，即全部组合策略
280 |         :param min_score: 决策树拟合时最小的auc，如果不满足则停止后续生成决策树
281 |         :param verbose: 是否调试模式，仅在 jupyter 环境有效
282 |         :param kwargs: DecisionTreeClassifier 参数
283 |         """
284 |         worksheet = self.writer.get_sheet_by_name("策略详情")
285 | 
286 |         y = x[self.target]
287 |         X_TE = self.encode_cat_features(x.drop(columns=[self.target]), y)
288 |         X_TE = X_TE.fillna(self.nan)
289 | 
290 |         self.feature_names = list(X_TE.columns)
291 | 
292 |         for i in range(self.max_iter):
293 |             decision_tree = DecisionTreeClassifier(max_depth=max_depth, *args, **kwargs)
294 |             decision_tree = decision_tree.fit(X_TE, y)
295 | 
296 |             if (min_score is not None and decision_tree.score(X_TE, y) < min_score) or len(X_TE.columns) < max_depth:
297 |                 break
298 | 
299 |             try:
300 |                 parsed_rules, remove, total_rules = self.select_dt_rules(decision_tree, X_TE, y, lift=lift, max_samples=max_samples, labels=self.labels, verbose=verbose, save=f"model_report/auto_mining_rules/combiner_rules_{i}.png", drop=True)
301 | 
302 |                 if len(parsed_rules) > 0:
303 |                     self.dt_rules = pd.concat([self.dt_rules, parsed_rules]).reset_index(drop=True)
304 | 
305 |                     if self.writer is not None:
306 |                         if self.feature_map is not None and len(self.feature_map) > 0:
307 |                             parsed_rules["组合策略"] = parsed_rules["组合策略"].replace(self.feature_map, regex=True)
308 |                         self.end_row, _ = self.insert_dt_rules(parsed_rules, self.end_row, self.start_col, save=f"model_report/auto_mining_rules/combiner_rules_{i}.png", figsize=(500, 100 * total_rules), sheet=worksheet)
309 | 
310 |                 X_TE = X_TE.drop(columns=remove)
311 |                 self.decision_trees.append(decision_tree)
312 |             except:
313 |                 import traceback
314 |                 traceback.print_exc()
315 | 
316 |         if len(self.dt_rules) <= 0:
317 |             print(f"未挖掘到有效策略, 可以考虑适当调整预设的筛选参数, 降低 lift / 提高 max_samples, 当前筛选标准为: 提取 lift >= {lift} 且 max_samples <= {max_samples} 的策略")
318 | 
319 |         return self
320 | 
321 |     def transform(self, x, y=None):
322 |         y = x[self.target]
323 |         X_TE = self.encode_cat_features(x.drop(columns=[self.target]), y)
324 |         X_TE = X_TE.fillna(self.nan)
325 |         if self.dt_rules is not None and len(self.dt_rules) > 0:
326 |             parsed_rules = self.query_dt_rules(X_TE, y, parsed_rules=self.dt_rules)
327 |             if self.feature_map is not None and len(self.feature_map) > 0:
328 |                 parsed_rules["组合策略"] = parsed_rules["组合策略"].replace(self.feature_map, regex=True)
329 |             return parsed_rules
330 |         else:
331 |             return pd.DataFrame(columns=self.describe_columns)
332 | 
333 |     def insert_all_rules(self, val=None, test=None, sheet="策略汇总"):
334 |         """组合策略插入excel文档
335 | 
336 |         :param val: 验证数据集
337 |         :param test: 测试数据集
338 | 
339 |         :return: 返回每个数据集组合策略命中情况
340 |         """
341 |         worksheet = self.writer.get_sheet_by_name(sheet or "决策树组合策略挖掘")
342 | 
343 |         if sheet:
344 |             self.writer.workbook.move_sheet(sheet, -1)
345 | 
346 |         parsed_rules_train = self.dt_rules.copy()
347 |         if self.feature_map is not None and len(self.feature_map) > 0:
348 |             parsed_rules_train["组合策略"] = parsed_rules_train["组合策略"].replace(self.feature_map, regex=True)
349 |         self.end_row, _ = self.writer.insert_value2sheet(worksheet, (2 if sheet else self.end_row + 2, self.start_col), value="训练集决策树组合策略", style="header_middle")
350 |         self.end_row, _ = self.insert_dt_rules(parsed_rules_train, self.end_row, self.start_col, sheet=worksheet)
351 |         outputs = (parsed_rules_train,)
352 | 
353 |         if len(parsed_rules_train) > 0:
354 |             if val is not None:
355 |                 parsed_rules_val = self.transform(val)
356 |                 self.end_row, _ = self.writer.insert_value2sheet(worksheet, (self.end_row + 2, self.start_col), value="验证集决策树组合策略", style="header_middle")
357 |                 self.end_row, _ = self.insert_dt_rules(parsed_rules_val, self.end_row, self.start_col, sheet=worksheet)
358 |                 outputs = outputs + (parsed_rules_val,)
359 | 
360 |             if test is not None:
361 |                 parsed_rules_test = self.transform(test)
362 |                 self.end_row, _ = self.writer.insert_value2sheet(worksheet, (self.end_row + 2, self.start_col), value="测试集决策树组合策略", style="header_middle")
363 |                 self.end_row, _ = self.insert_dt_rules(parsed_rules_test, self.end_row, self.start_col, sheet=worksheet)
364 |                 outputs = outputs + (parsed_rules_test,)
365 |         else:
366 |             if val is not None:
367 |                 outputs = outputs + (parsed_rules_train,)
368 | 
369 |             if test is not None:
370 |                 outputs = outputs + (parsed_rules_train,)
371 | 
372 |         return outputs
373 | 
374 |     def query_feature_rule(self, data, feature, desc="", plot=False, figsize=(10, 6), save=None, *args, **kwargs):
375 |         """传入数据集和其中一个特征名称，输出简单策略挖掘统计信息
376 | 
377 |         :param data: 数据集
378 |         :param feature: 特征名称
379 |         :param desc: 特征中文含义或其他相关信息
380 |         :param bin_plot: 是否可视化特征分箱图
381 |         :param figsize: 图像的尺寸
382 |         :param save: 图像保存的路径
383 | 
384 |         :return: pd.DataFrame, 特征分箱的统计信息
385 |         """
386 |         feature_table = feature_bin_stats(data, feature, desc=desc, *args, **kwargs)
387 | 
388 |         if plot:
389 |             self.bin_plot(feature_table, desc=desc, figsize=figsize, save=save)
390 | 
391 |         return feature_table
392 |     
393 |     @staticmethod
394 |     def bin_plot(*args, **kwargs):
395 |         return bin_plot(*args, **kwargs)
396 | 
397 |     def save(self, output="model_report/决策树组合策略挖掘.xlsx"):
398 |         self.writer.save(output)
399 | 
400 | 
401 | if __name__ == '__main__':
402 |     import numpy as np
403 |     import pandas as pd
404 |     from sklearn.model_selection import train_test_split
405 | 
406 |     feature_map = {}
407 |     n_samples = 10000
408 |     ab = np.array(list('ABCDEFG'))
409 | 
410 |     data = pd.DataFrame({
411 |         'A': np.random.randint(10, size=n_samples),
412 |         'B': ab[np.random.choice(7, n_samples)],
413 |         'C': ab[np.random.choice(2, n_samples)],
414 |         'D': np.random.random(size=n_samples),
415 |         'target': np.random.randint(2, size=n_samples)
416 |     })
417 | 
418 |     train, test = train_test_split(data, test_size=0.3, shuffle=data["target"])
419 | 
420 |     pdtr = ParseDecisionTreeRules(target="target", feature_map=feature_map, max_iter=8)
421 |     pdtr.fit(train, lift=1., max_depth=2, max_samples=0.5, verbose=False, min_samples_split=8, min_samples_leaf=5, max_features="auto")
422 |     pdtr.insert_all_rules(test=test)
423 |     pdtr.save()
424 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | wget
2 | scorecardpipeline
3 | category-encoders>=2.6.0
4 | statsmodels<0.14,>=0.13.2
5 | CairoSVG>=2.7.0
6 | graphviz>=0.20.1
7 | dtreeviz>=2.2.1
8 | reportlab
9 | svglib


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from setuptools import setup, find_packages, Extension
 4 | 
 5 | 
 6 | NAME = 'pdtr'
 7 | 
 8 | 
 9 | def get_version():
10 |     with open(f"{NAME}/__init__.py", "r", encoding="utf8") as f:
11 |         return re.search(r'__version__ = "(.*?)"', f.read()).group(1)
12 | 
13 | 
14 | def get_requirements(stage = None):
15 |     file_name = 'requirements'
16 | 
17 |     if stage is not None:
18 |         file_name = f"{file_name}-{stage}"
19 |     
20 |     requirements = []
21 |     with open(f"{file_name}.txt", 'r') as f:
22 |         for line in f:
23 |             line = line.strip()
24 |             if not line or line.startswith('-'):
25 |                 continue
26 |             
27 |             requirements.append(line)
28 |     
29 |     return requirements
30 | 
31 | 
32 | setup(
33 |     name = NAME,
34 |     version = get_version(),
35 |     description = '自动决策树规则挖掘工具包',
36 |     long_description = open('README.md', encoding = 'utf-8').read(),
37 |     long_description_content_type = 'text/markdown',
38 |     url = 'https://github.com/itlubber/pdtr',
39 |     author = 'itlubber',
40 |     author_email = 'itlubber@qq.com',
41 |     packages = find_packages(),
42 |     include_package_data = True,
43 |     python_requires = '>=3.6',
44 |     install_requires = get_requirements(),
45 |     license = 'MIT',
46 |     classifiers = [
47 |         'Operating System :: POSIX',
48 |         'Operating System :: Microsoft :: Windows',
49 |         'Operating System :: MacOS :: MacOS X',
50 |         'Programming Language :: Python :: 3.6',
51 |         'Programming Language :: Python :: 3.7',
52 |         'Programming Language :: Python :: 3.8',
53 |         'Programming Language :: Python :: 3.9',
54 |         'Programming Language :: Python :: 3.10',
55 |     ],
56 | )


--------------------------------------------------------------------------------