├── .gitignore ├── LICENSE ├── data ├── sh000001.csv ├── sh000001.day ├── sh000001_10.csv ├── sh000001_10.png ├── sh000001_2._csv ├── sh000001_3.csv └── sh000001_3.png ├── db ├── __init__.py └── db.md ├── demo.py ├── deploy ├── __init__.py └── installguide.txt ├── encrpty ├── __init__.py └── readme.md ├── featselection ├── __init__.py ├── extratreesclassifier.py ├── linearsvc.py ├── mutualinfoclassif.py ├── pipeline.py ├── rfe.py ├── selectpercentile.py └── variancemutualinfo.py ├── log └── attention_matrix.png ├── model.py ├── predict.png ├── predict2.png ├── profiling ├── __init__.py └── stats.py ├── readme.txt ├── requirements.txt ├── server.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /data/sh000001.day: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/data/sh000001.day -------------------------------------------------------------------------------- /data/sh000001_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/data/sh000001_10.png -------------------------------------------------------------------------------- /data/sh000001_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/data/sh000001_3.png -------------------------------------------------------------------------------- /db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/db/__init__.py -------------------------------------------------------------------------------- /db/db.md: -------------------------------------------------------------------------------- 1 | 1.首先下载安装包: 2 | 3 | https://www.mysql.com/cn/downloads/ 4 | https://downloads.mysql.com/archives/get/p/25/file/mysql-installer-community-5.7.35.0.msi 5 | 6 | 7 | 2.安装Mysql 8 | 9 | https://download.visualstudio.microsoft.com/download/pr/85d47aa9-69ae-4162-8300-e6b7e4bf3cf3/52B196BBE9016488C735E7B41805B651261FFA5D7AA86EB6A1D0095BE83687B2/VC_redist.x64.exe 10 | 11 | 3.安装SQLyog 12 | 13 | https://en.softonic.com/download/sqlyog/windows/post-download 14 | regedit [HKEY_CURRENT_USER]-[Software]第一个很长的注册项删除,可以重复使用 15 | [HKEY_CURRENT_USER]-[Software]-{FCE28CE8-D8CE-4637-9BC7-93E4C0D407FA}] 16 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/demo.py -------------------------------------------------------------------------------- /deploy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/deploy/__init__.py -------------------------------------------------------------------------------- /deploy/installguide.txt: -------------------------------------------------------------------------------- 1 | install module 2 | pip install -r requirements.txt 3 | 4 | apipost 5 | https://www.apipost.cn/download.html 6 | 7 | docker 8 | https://www.docker.com/products/docker-desktop 9 | 10 | upgrade 11 | python -m pip install --force-reinstall pip 12 | python -m pip install --force-reinstall setuptools 13 | 14 | -------------------------------------------------------------------------------- /encrpty/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/encrpty/__init__.py -------------------------------------------------------------------------------- /encrpty/readme.md: -------------------------------------------------------------------------------- 1 | pip install Cython 2 | Install visual studio 2019 3 | MinGW Cygwin 4 | 5 | python -m compileall 6 | 7 | 删除 目录下所有 .py 文件就可以打包发布。 8 | 9 | find . -name '*.py' -type f -print -exec rm {} \; 10 | 11 | https://gitforwindows.org/ 12 | 13 | https://tortoisegit.org/download/ 14 | 15 | python -O -m compileall . 16 | find . -name '*.pyc' -exec rename 's/.cpython-35.opt-1//' {} \; 17 | find . -name '*.pyc' -execdir mv {} .. \; 18 | find . -name '*.py' -type f -print -exec rm {} \; 19 | find . -name '__pycache__' -exec rmdir {} \; 20 | zip -r ../$1.zip ./* 21 | 22 | python -O -m compileall . 23 | find . -name "*.pyc" -exec rename ".cpython-39.opt-1" "" {} \; 24 | find . -name '*.pyc' -execdir mv {} .. \; 25 | find . -name '*.py' -type f -print -exec rm {} \; 26 | find . -name '__pycache__' -exec rmdir {} \; 27 | zip -r ../$1.zip ./* 28 | -------------------------------------------------------------------------------- /featselection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/featselection/__init__.py -------------------------------------------------------------------------------- /featselection/extratreesclassifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from sklearn.datasets import make_classification 5 | from sklearn.ensemble import ExtraTreesClassifier 6 | 7 | # Build a classification task using 3 informative features 8 | X, y = make_classification(n_samples=1000, 9 | n_features=10, 10 | n_informative=3, 11 | n_redundant=0, 12 | n_repeated=0, 13 | n_classes=2, 14 | random_state=0, 15 | shuffle=False) 16 | 17 | # Build a forest and compute the feature importances 18 | forest = ExtraTreesClassifier(n_estimators=250, random_state=0) 19 | forest.fit(X,y) 20 | importances = forest.feature_importances_ 21 | indices = np.argsort(importances)[::-1] 22 | #作图观察特征重要性 23 | plt.bar(range(10), importances[indices]) 24 | plt.xticks(range(X.shape[1]), indices) 25 | plt.show() -------------------------------------------------------------------------------- /featselection/linearsvc.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import LinearSVC 2 | from sklearn.datasets import load_iris 3 | from sklearn.feature_selection import SelectFromModel 4 | iris = load_iris() 5 | X, y = iris.data, iris.target 6 | 7 | # 基于 L1 的特征选取 8 | lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y) 9 | model = SelectFromModel(lsvc, prefit=True) 10 | X_new = model.transform(X) 11 | print(X_new.shape) -------------------------------------------------------------------------------- /featselection/mutualinfoclassif.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn import datasets 3 | import numpy as np 4 | import seaborn as sns 5 | from sklearn.feature_selection import mutual_info_classif 6 | import matplotlib.pyplot as plt 7 | 8 | iris = datasets.load_iris() 9 | X = iris.data 10 | y = iris.target 11 | 12 | new_y = [y[i:i+1] for i in range(len(y))] 13 | data = np.hstack((X, new_y)) 14 | data_df = pd.DataFrame(data) 15 | 16 | #0到3表示特征,4表示目标变量,画图查看相关性,如下图所示 17 | # plt.figure(figsize=(30,20)) 18 | sns.set(font='SimHei',font_scale=1.0) 19 | fig, ax = plt.subplots(figsize = (18,20)) 20 | sns.heatmap(data_df.corr(), annot=True, vmax=1,vmin = 0, xticklabels= True, yticklabels= True, square=True, cmap="YlGnBu") 21 | plt.savefig("../log/attention_matrix.png") 22 | plt.show() 23 | 24 | mutual_info = mutual_info_classif(X, y, discrete_features= False) 25 | print(mutual_info) -------------------------------------------------------------------------------- /featselection/pipeline.py: -------------------------------------------------------------------------------- 1 | from sklearn.pipeline import Pipeline 2 | from sklearn.feature_selection import SelectFromModel 3 | from sklearn.svm import LinearSVC 4 | from sklearn.ensemble import RandomForestClassifier 5 | from sklearn import datasets 6 | 7 | iris = datasets.load_iris() 8 | X = iris.data 9 | y = iris.target 10 | 11 | clf = Pipeline([ 12 | ('feature_selection', SelectFromModel(LinearSVC(penalty="l1"))), 13 | ('classification', RandomForestClassifier()) 14 | ]) 15 | clf.fit(X, y) -------------------------------------------------------------------------------- /featselection/rfe.py: -------------------------------------------------------------------------------- 1 | from sklearn.feature_selection import RFE 2 | from sklearn.ensemble import GradientBoostingClassifier 3 | from sklearn import datasets 4 | 5 | iris = datasets.load_iris() 6 | 7 | gbdt_RFE = RFE(estimator=GradientBoostingClassifier(random_state= 123),n_features_to_select=2) 8 | gbdt_RFE.fit(iris.data, iris.target) 9 | 10 | #特征选择输出结果 11 | print(gbdt_RFE.support_) -------------------------------------------------------------------------------- /featselection/selectpercentile.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | from sklearn.feature_selection import SelectKBest, SelectPercentile 3 | from sklearn.feature_selection import f_classif 4 | 5 | iris = load_iris() 6 | X, y = iris.data, iris.target 7 | 8 | # 特征选择 9 | sp = SelectPercentile(f_classif, percentile=90) 10 | 11 | # 返回至少含有90%特征信息的特征 12 | X_result = sp.fit_transform(X, y) 13 | print(X_result) 14 | 15 | # 保留的特征 16 | features = sp.get_support() 17 | print(features) 18 | -------------------------------------------------------------------------------- /featselection/variancemutualinfo.py: -------------------------------------------------------------------------------- 1 | from sklearn.feature_selection import VarianceThreshold 2 | from sklearn.feature_selection import SelectKBest 3 | from sklearn.feature_selection import mutual_info_classif as MIC 4 | import numpy as np 5 | import pandas as pd 6 | 7 | data = pd.read_csv("mci_labelled_feature_gen.csv") 8 | data = data.fillna(0) 9 | X = data.iloc[:,:-2] 10 | y = data.iloc[:,-2] 11 | 12 | # 方差选择特征 13 | X1 = VarianceThreshold().fit_transform(X) 14 | selector = VarianceThreshold(np.median(X.var().values)) 15 | X_fsvar = selector.fit_transform(X) 16 | all_name = X.columns.values.tolist() # 获得所有的特征名称 17 | select_name_index0 = selector.get_support(indices=True) # 留下特征的索引值,list格式 18 | select_name0 = [] 19 | for i in select_name_index0: 20 | select_name0.append(all_name[i]) 21 | print(X1.shape) 22 | print(X_fsvar.shape) 23 | print(select_name0) 24 | 25 | # 互信息特征选择 26 | mic = MIC(X_fsvar, y, random_state=0) 27 | k = mic.shape[0] - sum(mic <= 0) # 获得与标签列相关的特征列个数 28 | skb = SelectKBest(MIC, k=k) # 特征选择 29 | x_train = skb.fit_transform(X_fsvar, y) 30 | select_name_index = skb.get_support(indices=True) 31 | select_name = [] 32 | for i in select_name_index: 33 | select_name.append(select_name0[i]) 34 | print(select_name) 35 | print(len(select_name)) 36 | 37 | 38 | -------------------------------------------------------------------------------- /log/attention_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/log/attention_matrix.png -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import train_test_split 2 | from sklearn.metrics import accuracy_score 3 | from lightgbm import LGBMClassifier, LGBMRegressor 4 | from sklearn.metrics import roc_curve 5 | from sklearn.model_selection import GridSearchCV 6 | from sklearn.metrics import mean_squared_error 7 | from utils import gen_model_datum 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | import time 12 | import profile 13 | 14 | plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 15 | plt.rcParams['axes.unicode_minus']=False #用来正常显示负号 16 | 17 | class Regressor: 18 | def __init__(self, step=10, feature_num=6): 19 | self.step = step 20 | self.feature_num = feature_num 21 | self.X, self.y = gen_model_datum(step=step, feature_num=6) 22 | 23 | def model_datum(self): 24 | self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, 25 | random_state=123) 26 | 27 | def model_train(self): 28 | self.model = LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=25, 29 | learning_rate=0.2, n_estimators=70, max_depth=15, 30 | metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9) 31 | 32 | self.model.fit(self.X_train, self.y_train) 33 | 34 | def model_predict(self): 35 | self.y_pred = self.model.predict(self.X_test, num_iteration=self.model.best_iteration_) 36 | 37 | def model_evaluate(self): 38 | print('预测结果的rmse是:') 39 | print(mean_squared_error(self.y_test, self.y_pred) ** 0.5) 40 | 41 | def plot_predict(self): 42 | a = pd.DataFrame() 43 | a['预测值'] = list(self.y_pred) 44 | a['实际值'] = list(self.y_test) 45 | plt.scatter(a['实际值'], a['预测值'],c='r') 46 | plt.xlabel("实际价格") # x轴名称 47 | plt.ylabel("预测股价") # y轴名称 48 | plt.savefig("predict.png") 49 | plt.show() 50 | plt.plot(a['预测值'][10:100] ) 51 | plt.plot(a['实际值'][10:100]) 52 | plt.savefig("predict2.png") 53 | plt.show() 54 | 55 | 56 | 57 | # @profile 58 | def model_param_search(self): 59 | parameters = {"boosting_type": 'gbdt', "objective": 'regression', "num_leaves": 1200, 60 | "learning_rate": 0.1, "n_estimators": 200, "max_depth": 15, 61 | "metric": 'rmse', "bagging_fraction": 0.8, "feature_fraction": 0.8, "reg_lambda": 0.9 62 | } 63 | model = LGBMRegressor(**parameters) 64 | parameters_s = {'num_leaves': range(5, 30, 5), 'n_estimators': range(10, 200, 30), 65 | 'learning_rate': [0.01, 0.1, 0.2]} 66 | grid_search = GridSearchCV(model, param_grid=parameters_s, cv=5) 67 | grid_search.fit(self.X_train, np.array(self.y_train)) 68 | grid_search.best_params_ 69 | print(grid_search.best_params_) 70 | 71 | 72 | if __name__ == '__main__': 73 | regressor = Regressor() 74 | regressor.model_datum() 75 | regressor.model_train() 76 | regressor.model_predict() 77 | regressor.model_evaluate() 78 | regressor.plot_predict() 79 | 80 | # start = time.time() 81 | # regressor.model_param_search() 82 | # end = time.time() 83 | # print("计算时间:{}".format(end - start)) 84 | -------------------------------------------------------------------------------- /predict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/predict.png -------------------------------------------------------------------------------- /predict2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/predict2.png -------------------------------------------------------------------------------- /profiling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackyin68/pyprofiling/3f4180c735b80b978028cad776d30a6b99af1547/profiling/__init__.py -------------------------------------------------------------------------------- /profiling/stats.py: -------------------------------------------------------------------------------- 1 | import pstats 2 | 3 | p_stats = pstats.Stats('profile.stats') 4 | p_stats.sort_stats("cumulative") 5 | 6 | # 输出累计时间报告 7 | p_stats.print_stats() 8 | 9 | # 输出调用者信息 10 | p_stats.print_callers() 11 | 12 | # 输出哪个函数调用了哪个函数 13 | p_stats.print_callees() -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | 性能分析工具 2 | 3 | 一、基本的查询运行时间工具 — time 4 | """ 5 | start = time.time() 6 | regressor.model_param_search() 7 | end = time.time() 8 | print("计算时间:{}".format(end - start)) 9 | """ 10 | 11 | 二、通过 timeit 模块计算代码执行时间 12 | Python 提供了 timeit 模块来测量代码的执行速度,可以用该模块来对各种简单的语句进行计时。 13 | """ 14 | python3.6 -m timeit -n 5 -r 5 -s "import module_name" "module_name.func(*args, **kwargs)" 15 | """ 16 | 简要对上面的语句作下说明: 17 | 18 | -m: 指定要作为脚本运行的内置模块名称,这里就是 timeit 模块 19 | -n: 表示 timeit 要对执行的代码循环执行 n 次 20 | -r: 表示 timeit 会重复 r 次执行。 -n 与 -r 的表示的意思就是,timeit 模块首先会对要执行的代码循环执行 n 次,取 n 次的平均值作为一个结果,然后重复 r 次,这样就得到了 r 个结果,然后选出最好的结果进行返回 21 | -s: 表示导入要执行的代码所属的 module, 后面就是通过 module_name.func_name() 表示要测试执行的代码了 22 | 23 | 三、cProfile 模块 24 | """ 25 | cProfile 是标准库内建的分析工具,可以用来测量每一个函数的执行时间。其基本的使用命令如下: 26 | python -m cProfile -s cumulative cp02/demo01.py 27 | -m 表示执行 cProfile 模块, -s cumulative 表示对每个函数累计花费时间进行排序,可以让我们很直观的看到哪一部分的代码执行的最慢。 28 | 其结果如下: 29 | 30 | 更好用的方式是生成一个统计文件,然后通过 pstats 模块进行分析,命令如下: 31 | python -m cProfile -o profile.stats cp02/demo01.py 32 | 这样将统计结果存储到 profile.stats 文件之后就可以通过 pstats 模块来查看。 33 | 34 | 其各项的含义如下: 35 | ncalls: 函数执行次数 36 | tottime: 累计耗时 37 | percall: 每次耗时 38 | cumtime: 包括子函数的执行时间 39 | percall 每次的执行时间 40 | filename:lineno(function): 文件名+代码行数+方法名 41 | 通过 cProfile 可以快速的定位出现性能瓶颈的函数,然后在针对函数进行进一步的分析。 42 | """ 43 | 44 | 四、line_profiler – 逐行代码分析工具 45 | """ 46 | line_profiler 可以对函数进行逐行分析,是调查 Python 的 CPU 密集型问题最强大的工具。通常的使用步骤是先用 cProfile 进行函数分析, 47 | 然后在对有性能瓶颈的函数进行逐行分析。 48 | 49 | 1.安装 line_profiler 50 | pip install line_profiler 51 | 52 | 2. 使用 line_profiler 53 | 首先在要分析的函数上添加装饰器 @profile 54 | 然后使用 kernprof 命令执行对应的 Python 代码,如下: 55 | kernprof -l cp02/demo01.py 56 | -l 参数那个了表示逐行分析,另外还可以有 -v 参数用来显示输出,不加的话会生成一个 .lprof 的输出文件。得到文件后可以用下面命令查看: 57 | 58 | python -m line_profiler demo01.py.lprof 59 | 其各项含义为: 60 | Line: 代码行数 61 | Hits: 执行次数 62 | Time: 占用的总时间 63 | Per Hit: 每次执行的时间 64 | Time: 时间占比 65 | Line Contents: 代码内容 66 | """ 67 | 68 | 五、memory_profiler – 诊断内存的用量 69 | """ 70 | memory_profiler 模块能够逐行测量内存的占用情况 71 | 1. 安装 72 | pip install psutil # 需要先安装 psutil 模块 73 | pip install memory_profiler 74 | 2. 使用 75 | 命令如下: 76 | python -m memory_profiler cp02/demo01.py 77 | 各项含义为 78 | Mem Usage: 内存占用情况 79 | Increment: 执行该行代码后新增的内存 80 | 另外 memory_profiler 提供了一个 mprof 进行可视化的内存展示,使用该命令需要安装 matplotlib 81 | pip install matplotlib 82 | 执行如下命令: 83 | mprof run cp02/demo01.py # 生成统计文件 84 | mprof plot mprofile_20170904220625.dat # 展示统计文件 85 | """ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | backcall==0.2.0 2 | click==8.0.3 3 | colorama==0.4.4 4 | cycler==0.11.0 5 | Cython==0.29.26 6 | dataclasses==0.8 7 | decorator==5.1.0 8 | Flask==2.0.2 9 | importlib-metadata==4.8.3 10 | ipython==7.16.2 11 | ipython-genutils==0.2.0 12 | itsdangerous==2.0.1 13 | jedi==0.17.2 14 | Jinja2==3.0.3 15 | joblib==1.1.0 16 | kiwisolver==1.3.1 17 | lightgbm==3.3.1 18 | line-profiler==3.4.0 19 | MarkupSafe==2.0.1 20 | matplotlib==3.3.4 21 | numpy==1.19.5 22 | pandas==1.1.5 23 | parso==0.7.1 24 | pickleshare==0.7.5 25 | Pillow==8.4.0 26 | prompt-toolkit==3.0.24 27 | Pygments==2.11.2 28 | pyparsing==3.0.6 29 | python-dateutil==2.8.2 30 | pytz==2021.3 31 | scikit-learn==0.24.2 32 | scipy==1.5.4 33 | seaborn==0.11.2 34 | six==1.16.0 35 | threadpoolctl==3.0.0 36 | traitlets==4.3.3 37 | typing-extensions==4.0.1 38 | wcwidth==0.2.5 39 | Werkzeug==2.0.2 40 | zipp==3.6.0 41 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | path = os.path.dirname(sys.path[0]) 5 | if path and path not in sys.path: 6 | sys.path.append(path) 7 | 8 | from flask import Flask 9 | 10 | app = Flask("Product") 11 | 12 | 13 | @app.route("/") 14 | def welcome(): 15 | return "欢迎来到通达信数据分析的世界" 16 | 17 | 18 | if __name__ == '__main__': 19 | app.run() 20 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import os 3 | import struct 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def read_tdx_day_file(file_path): 9 | data_set = [] 10 | with open(file_path, 'rb') as fl: 11 | buffer = fl.read() 12 | size = len(buffer) 13 | row_size = 32 14 | code = os.path.basename(file_path).replace('.day', '') 15 | for i in range(0, size, row_size): 16 | row = list(struct.unpack('IIIIIfII', buffer[i:i + row_size])) 17 | row[1] = row[1] / 100 18 | row[2] = row[2] / 100 19 | row[3] = row[3] / 100 20 | row[4] = row[4] / 100 21 | row.pop() 22 | row.insert(0, code) 23 | data_set.append(row) 24 | 25 | df = pd.DataFrame(data=data_set, columns=['code', 'tradeDate', 'open', 'high', 'low', 'close', 'amount', 'vol']) 26 | return df 27 | 28 | 29 | def gen_model_datum_from_file(step, feature_num, file_path): 30 | feature_q = queue.Queue(maxsize=step) 31 | feature_qb = queue.Queue(maxsize=step) 32 | df = read_tdx_day_file(file_path) 33 | df["nextCLose"] = df["close"].shift(axis=0, periods=-1, fill_value=0) 34 | df = df[:-1] 35 | f_list = [] 36 | for index, row in df.iterrows(): 37 | feature_q.put(row[1:-1]) 38 | # print("index:{}".format(index)) 39 | if index >= step - 1: 40 | row_list = [] 41 | for i in range(step): 42 | if i == 0: 43 | row_ = feature_q.get() 44 | else: 45 | row_ = feature_q.get() 46 | feature_qb.put(row_) 47 | row_list.extend(row_.values.tolist()) 48 | # for j in range(row.size): 49 | # col_name=row.keys()[j]+"_"+str(i) 50 | # df_c.loc[index-step+1:col_name]=row.values[j] 51 | row_list.append(row[-1]) 52 | row_list.insert(0, row[0]) 53 | f_list.append(row_list) 54 | feature_q = feature_qb 55 | # print("qsize:{}".format(feature_q.qsize())) 56 | # print(row) 57 | # print(f_list) 58 | columns_name = gen_columns_name(df.columns,step) 59 | df = pd.DataFrame(f_list,columns=columns_name) 60 | df.to_csv(file_path.split(".")[0]+"_"+str(step)+".csv",index=False) 61 | return df 62 | 63 | def gen_model_datum(step, feature_num,file_path="data/sh000001.day"): 64 | df = gen_model_datum_from_file(step, feature_num,file_path) 65 | columns = df.columns.tolist()[1:] 66 | columns_drop = [] 67 | for column in columns: 68 | if "tradeDate" in column: 69 | columns_drop.append(column) 70 | columns_drop.append("code") 71 | print(columns_drop) 72 | 73 | X = df.drop(columns=columns_drop) 74 | y = df['predict'] 75 | return X,y 76 | 77 | 78 | def plot_y(y,file,file_path): 79 | plt.plot(y) 80 | plt.savefig(file_path.split(".")[0] + ".png") 81 | plt.show() 82 | 83 | 84 | def gen_columns_name(columns,step): 85 | columns_name = [] 86 | columns_l = columns[1:-1] 87 | for i in range(step): 88 | for j in range(len(columns_l)): 89 | columns_name.append(columns_l[j]+"_"+str(i)) 90 | columns_name.append("predict") 91 | columns_name.insert(0,columns[0]) 92 | return columns_name 93 | 94 | 95 | if __name__ == '__main__': 96 | X,y=gen_model_datum(step=3,feature_num=20) 97 | print(X) 98 | print(y) 99 | --------------------------------------------------------------------------------