├── README.md ├── baseline.ipynb ├── 第一、二问集成学习算法.ipynb ├── 第一问信用评级的描述性统计.ipynb ├── 第一问所有特征.csv ├── 第一问风险违约率求解.ipynb ├── 第一问:作废发票计算.ipynb └── 论文.pdf /README.md: -------------------------------------------------------------------------------- 1 | # 2020-CMCM-C 2 | 2020年数学建模国赛C题国一优秀论文——编号C227:银行对中小微企业的信贷决策分析 3 | 4 | # 比赛的代码 5 | 这个项目包含了C227这篇论文的代码和特征工程之后的数据集,特征工程的全部代码我找不到了,所以我放上了第一问的csv文件,在new_code文件夹下。 6 | 论文中有比赛期间我们的全部代码,***但是我不确定有没有问题hhh*** 7 | 模型训练部分的代码其实有些问题,比如我们用了全部数据集做评估,而不是测试集。 8 | 9 | # 更新后的代码(baseline.ipynb) 10 | baseline中的代码是更改之后的代码,我尝试使用了SMOTETomek(一种过采样+欠采样)的方法, 11 | Stacking融合之后的AUC还是可以达到***0.98***的样子的,F1在0.88左右,基分类器选的是lgb,rf和gbdt,其实效果不算很好, 12 | 但针对于银行信贷来说,recall是很重要的,所幸stacking后的recall是1 13 | 14 | 我也尝试用了lgb单模型,可以看见在非平衡数据上lgb还是非常给力的。 15 | 16 | 希望这些代码能给你提供帮助 17 | -------------------------------------------------------------------------------- /baseline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 60, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "from collections import Counter\n", 14 | "from imblearn.combine import SMOTETomek # 过采样+欠采样结合的方法处理非平衡数据集\n", 15 | "\n", 16 | "from sklearn.linear_model import LogisticRegression\n", 17 | "from sklearn.metrics import precision_score,recall_score\n", 18 | "from sklearn.model_selection import train_test_split\n", 19 | "from sklearn.preprocessing import StandardScaler\n", 20 | "from sklearn.model_selection import GridSearchCV\n", 21 | "from sklearn import metrics\n", 22 | "from sklearn.ensemble import AdaBoostClassifier as ada\n", 23 | "from sklearn.ensemble import GradientBoostingClassifier\n", 24 | "from sklearn.svm import SVC\n", 25 | "from sklearn.ensemble import RandomForestClassifier as RF\n", 26 | "from sklearn.model_selection import cross_val_score\n", 27 | "from sklearn.metrics import roc_auc_score\n", 28 | "from sklearn.ensemble import VotingClassifier\n", 29 | "from mlxtend.classifier import StackingClassifier\n", 30 | "from mlxtend.classifier import StackingCVClassifier\n", 31 | "\n", 32 | "# Voting \n", 33 | "data = pd.read_csv('第一问所有特征.csv',encoding='utf-8',index_col='企业代号')\n", 34 | "for i in range(len(data)):\n", 35 | " a='E'+str(i+1)\n", 36 | " if data.loc[a,'是否违约']=='否':\n", 37 | " data.loc[a,'违约']=0\n", 38 | " else :\n", 39 | " data.loc[a,'违约']=1\n", 40 | "\n", 41 | "x = data.iloc[:,:-4].values\n", 42 | "y = data.iloc[:,-1].values" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 61, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=30)\n", 52 | "smote_tomek = SMOTETomek(random_state=0)\n", 53 | "x_train, y_train = smote_tomek.fit_resample(x_train, y_train)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 62, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "from lightgbm import LGBMClassifier\n", 63 | "\n", 64 | "lgb = LGBMClassifier(learning_rate=0.05,n_estimators=100,objective='binary',\n", 65 | " boosting_type='gbdt',\n", 66 | " num_leaves=2**5,\n", 67 | " max_depth=5,reg_alpha=0.5,reg_lambda=0.5,\n", 68 | " metric='auc',subsample=0.75)\n", 69 | "\n", 70 | "LR = LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,\n", 71 | " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", 72 | " multi_class='auto', n_jobs=None, penalty='l2',\n", 73 | " random_state=None, solver='newton-cg', tol=0.0001, verbose=0,\n", 74 | " warm_start=False)\n", 75 | "Ada = ada(algorithm='SAMME', base_estimator=None, learning_rate=0.1,\n", 76 | " n_estimators=100, random_state=30)\n", 77 | "GBDT = GradientBoostingClassifier(ccp_alpha=0.0, learning_rate=0.7, max_depth=3)\n", 78 | "svc = SVC(C=0.8, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,\n", 79 | " decision_function_shape='ovr', degree=3, gamma=20, kernel='rbf',\n", 80 | " max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,\n", 81 | " verbose=False)\n", 82 | "\n", 83 | "rf = RF(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n", 84 | " criterion='gini', max_depth=None, max_features='auto',\n", 85 | " max_leaf_nodes=None, max_samples=None,\n", 86 | " min_impurity_decrease=0.0,min_samples_leaf=1, min_samples_split=2,\n", 87 | " min_weight_fraction_leaf=0.0, n_estimators=100,\n", 88 | " n_jobs=None, oob_score=False, random_state=30, verbose=0,\n", 89 | " warm_start=False)\n", 90 | "\n", 91 | "\n", 92 | "sclf = StackingCVClassifier(classifiers=[Ada, GBDT, LR,rf],\n", 93 | " use_probas=True,\n", 94 | " meta_classifier=svc,\n", 95 | " random_state=30)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 63, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "tranfer = StandardScaler()\n", 105 | "x = tranfer.fit_transform(x)\n", 106 | "x_train = tranfer.transform(x_train)\n", 107 | "x_test = tranfer.transform(x_test)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 64, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "AdaBoostClassifier(algorithm='SAMME', learning_rate=0.1, n_estimators=100,\n", 119 | " random_state=30)" 120 | ] 121 | }, 122 | "execution_count": 64, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "Ada" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 65, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/html": [ 139 | "
\n", 140 | "\n", 153 | "\n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | "
train_Accuracytest_AccuracyPrecisionRecallAUCf1
LR0.8671880.8387100.4000000.500.7592590.444444
Ada0.9531250.7419350.3333331.000.8472220.500000
GBDT1.0000000.8064520.3750000.750.9074070.500000
svc1.0000000.9032261.0000000.250.5000000.400000
rf1.0000000.8387100.4444441.000.9537040.615385
LightGBM0.9843750.7741940.3333330.750.9166670.461538
StackingClassifier0.9843750.9677420.8000001.000.9814810.888889
\n", 231 | "
" 232 | ], 233 | "text/plain": [ 234 | " train_Accuracy test_Accuracy Precision Recall \\\n", 235 | "LR 0.867188 0.838710 0.400000 0.50 \n", 236 | "Ada 0.953125 0.741935 0.333333 1.00 \n", 237 | "GBDT 1.000000 0.806452 0.375000 0.75 \n", 238 | "svc 1.000000 0.903226 1.000000 0.25 \n", 239 | "rf 1.000000 0.838710 0.444444 1.00 \n", 240 | "LightGBM 0.984375 0.774194 0.333333 0.75 \n", 241 | "StackingClassifier 0.984375 0.967742 0.800000 1.00 \n", 242 | "\n", 243 | " AUC f1 \n", 244 | "LR 0.759259 0.444444 \n", 245 | "Ada 0.847222 0.500000 \n", 246 | "GBDT 0.907407 0.500000 \n", 247 | "svc 0.500000 0.400000 \n", 248 | "rf 0.953704 0.615385 \n", 249 | "LightGBM 0.916667 0.461538 \n", 250 | "StackingClassifier 0.981481 0.888889 " 251 | ] 252 | }, 253 | "execution_count": 65, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "sclf = StackingCVClassifier(classifiers=[lgb, GBDT,rf],\n", 260 | " use_probas=True,\n", 261 | " meta_classifier=svc,\n", 262 | " random_state=30)\n", 263 | "\n", 264 | "zhibiao = {}\n", 265 | "\n", 266 | "# weight = []\n", 267 | "for clf, label in zip([LR, Ada, GBDT, svc, rf, lgb,sclf],\n", 268 | " ['LR',\n", 269 | " 'Ada',\n", 270 | " 'GBDT',\n", 271 | " 'svc',\n", 272 | " 'rf', 'LightGBM','StackingClassifier']):\n", 273 | " clf.fit(x_train, y_train)\n", 274 | " y_predict = clf.predict(x_test)\n", 275 | "# print('{}在预测集模型的准确率为:\\n'.format(label), metrics.accuracy_score(y_test, y_predict))\n", 276 | "# print('{}在训练集模型的准确率为:\\n'.format(label), metrics.accuracy_score(y_train, clf.predict(x_train)))\n", 277 | "# print('{}的综合准确率为:\\n'.format(label), metrics.accuracy_score(y, clf.predict(x)))\n", 278 | "# print('{}的Precision为:'.format(label), precision_score(y_test, y_predict))\n", 279 | "# print('{}的Recall为:'.format(label), recall_score(y_test, y_predict))\n", 280 | " tem = metrics.roc_auc_score(y_test, y_predict)\n", 281 | "# weight.append(tem)\n", 282 | "# print('{}的ROC面积为:'.format(label), metrics.roc_auc_score(y_test, clf.predict_proba(x_test)[:,1]))\n", 283 | "# print('{}的f1值为:'.format(label), metrics.f1_score(y_test, y_predict))\n", 284 | "# print()\n", 285 | " \n", 286 | " tem_1 = [metrics.accuracy_score(y_train, clf.predict(x_train)),metrics.accuracy_score(y_test, y_predict),\n", 287 | " precision_score(y_test, y_predict),recall_score(y_test, y_predict),\n", 288 | " metrics.roc_auc_score(y_test, clf.predict_proba(x_test)[:,1]),metrics.f1_score(y_test, y_predict)]\n", 289 | " zhibiao[label]=tem_1\n", 290 | "data2 = pd.DataFrame(data=zhibiao,index=['train_Accuracy','test_Accuracy',\n", 291 | " 'Precision','Recall','AUC','f1']).T\n", 292 | "data2" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 57, 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/html": [ 303 | "
\n", 304 | "\n", 317 | "\n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | "
train_Accuracytest_AccuracyPrecisionRecallAUCf1
LightGBM0.9565220.9677420.81.00.8055560.888889
\n", 341 | "
" 342 | ], 343 | "text/plain": [ 344 | " train_Accuracy test_Accuracy Precision Recall AUC f1\n", 345 | "LightGBM 0.956522 0.967742 0.8 1.0 0.805556 0.888889" 346 | ] 347 | }, 348 | "execution_count": 57, 349 | "metadata": {}, 350 | "output_type": "execute_result" 351 | } 352 | ], 353 | "source": [ 354 | "# LGB单模(对非平衡数据集设置is_unbalance)\n", 355 | "x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=30)\n", 356 | "clf = LGBMClassifier(learning_rate=0.05,n_estimators=100,objective='binary',\n", 357 | " boosting_type='gbdt',\n", 358 | " num_leaves=2**5,\n", 359 | " max_depth=5,reg_alpha=0.5,reg_lambda=0.5,is_unbalance=True,\n", 360 | " metric='auc',subsample=0.75)\n", 361 | "clf.fit(x_train,y_train)\n", 362 | "pd.DataFrame(data=[metrics.accuracy_score(y_train, clf.predict(x_train)),metrics.accuracy_score(y_test, y_predict),\n", 363 | " precision_score(y_test, y_predict),recall_score(y_test, y_predict),\n", 364 | " metrics.roc_auc_score(y_test, clf.predict_proba(x_test)[:,1]),metrics.f1_score(y_test, y_predict)],index=['train_Accuracy','test_Accuracy',\n", 365 | " 'Precision','Recall','AUC','f1'],columns=['LightGBM']).T" 366 | ] 367 | } 368 | ], 369 | "metadata": { 370 | "interpreter": { 371 | "hash": "07efdcd4b820c98a756949507a4d29d7862823915ec7477944641bea022f4f62" 372 | }, 373 | "kernelspec": { 374 | "display_name": "Python 3.8.8 ('base')", 375 | "language": "python", 376 | "name": "python3" 377 | }, 378 | "language_info": { 379 | "codemirror_mode": { 380 | "name": "ipython", 381 | "version": 3 382 | }, 383 | "file_extension": ".py", 384 | "mimetype": "text/x-python", 385 | "name": "python", 386 | "nbconvert_exporter": "python", 387 | "pygments_lexer": "ipython3", 388 | "version": "3.8.8" 389 | }, 390 | "orig_nbformat": 4 391 | }, 392 | "nbformat": 4, 393 | "nbformat_minor": 2 394 | } 395 | -------------------------------------------------------------------------------- /第一问信用评级的描述性统计.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 3, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "
\n", 22 | "\n", 35 | "\n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | "
违约概率信誉评级
00.144146A
10.109683A
20.087636C
30.100442C
40.132734B
.........
1180.690629D
1190.863239D
1200.669896D
1210.813119D
1220.858223D
\n", 101 | "

123 rows × 2 columns

\n", 102 | "
" 103 | ], 104 | "text/plain": [ 105 | " 违约概率 信誉评级\n", 106 | "0 0.144146 A\n", 107 | "1 0.109683 A\n", 108 | "2 0.087636 C\n", 109 | "3 0.100442 C\n", 110 | "4 0.132734 B\n", 111 | ".. ... ...\n", 112 | "118 0.690629 D\n", 113 | "119 0.863239 D\n", 114 | "120 0.669896 D\n", 115 | "121 0.813119 D\n", 116 | "122 0.858223 D\n", 117 | "\n", 118 | "[123 rows x 2 columns]" 119 | ] 120 | }, 121 | "execution_count": 3, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "data = pd.read_csv('违约风险.csv',encoding='gbk')\n", 128 | "data" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 17, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "A_max = data[data['信誉评级']=='A']['违约概率'].max()\n", 138 | "A_min = data[data['信誉评级']=='A']['违约概率'].min()\n", 139 | "\n", 140 | "B_max = data[data['信誉评级']=='B']['违约概率'].max()\n", 141 | "B_min = data[data['信誉评级']=='B']['违约概率'].min()\n", 142 | "\n", 143 | "C_max = data[data['信誉评级']=='C']['违约概率'].max()\n", 144 | "C_min = data[data['信誉评级']=='C']['违约概率'].min()\n", 145 | "\n", 146 | "D_max = data[data['信誉评级']=='D']['违约概率'].max()\n", 147 | "D_min = data[data['信誉评级']=='D']['违约概率'].min()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 18, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/html": [ 158 | "
\n", 159 | "\n", 172 | "\n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | "
ABCD
Max0.4347120.7563490.6382700.882178
Min0.0787570.1009520.0876360.237270
\n", 199 | "
" 200 | ], 201 | "text/plain": [ 202 | " A B C D\n", 203 | "Max 0.434712 0.756349 0.638270 0.882178\n", 204 | "Min 0.078757 0.100952 0.087636 0.237270" 205 | ] 206 | }, 207 | "execution_count": 18, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "dd = {'A':[A_max,A_min],\n", 214 | " 'B':[B_max,B_min],\n", 215 | " 'C':[C_max,C_min],\n", 216 | " 'D':[D_max,D_min]}\n", 217 | "\n", 218 | "d1 = pd.DataFrame(data=dd,index=['Max','Min'])\n", 219 | "d1" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 27, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/html": [ 230 | "
\n", 231 | "\n", 244 | "\n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | "
ABCD
count27.00000038.00000034.00000024.000000
mean0.1450620.1816960.2085710.709669
std0.0677850.1078090.1466120.169678
min0.0787570.1009520.0876360.237270
25%0.1102930.1373160.1211820.651753
50%0.1286960.1611500.1566890.780061
75%0.1623110.1913590.1986890.814742
max0.4347120.7563490.6382700.882178
\n", 313 | "
" 314 | ], 315 | "text/plain": [ 316 | " A B C D\n", 317 | "count 27.000000 38.000000 34.000000 24.000000\n", 318 | "mean 0.145062 0.181696 0.208571 0.709669\n", 319 | "std 0.067785 0.107809 0.146612 0.169678\n", 320 | "min 0.078757 0.100952 0.087636 0.237270\n", 321 | "25% 0.110293 0.137316 0.121182 0.651753\n", 322 | "50% 0.128696 0.161150 0.156689 0.780061\n", 323 | "75% 0.162311 0.191359 0.198689 0.814742\n", 324 | "max 0.434712 0.756349 0.638270 0.882178" 325 | ] 326 | }, 327 | "execution_count": 27, 328 | "metadata": {}, 329 | "output_type": "execute_result" 330 | } 331 | ], 332 | "source": [ 333 | "A_des = data[data['信誉评级']=='A']['违约概率'].describe()\n", 334 | "B_des = data[data['信誉评级']=='B']['违约概率'].describe()\n", 335 | "C_des = data[data['信誉评级']=='C']['违约概率'].describe()\n", 336 | "D_des = data[data['信誉评级']=='D']['违约概率'].describe()\n", 337 | "\n", 338 | "dd2 = {'A':A_des,'B':B_des,'C':C_des,'D':D_des}\n", 339 | "d2 = pd.DataFrame(data=dd2)\n", 340 | "d2" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 29, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "d2.to_csv('各类信誉评级的违约风险的描述性统计分析.csv',encoding='gbk')" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [] 372 | } 373 | ], 374 | "metadata": { 375 | "kernelspec": { 376 | "display_name": "Python [conda env:root] *", 377 | "language": "python", 378 | "name": "conda-root-py" 379 | }, 380 | "language_info": { 381 | "codemirror_mode": { 382 | "name": "ipython", 383 | "version": 3 384 | }, 385 | "file_extension": ".py", 386 | "mimetype": "text/x-python", 387 | "name": "python", 388 | "nbconvert_exporter": "python", 389 | "pygments_lexer": "ipython3", 390 | "version": "3.7.6" 391 | }, 392 | "toc": { 393 | "base_numbering": 1, 394 | "nav_menu": {}, 395 | "number_sections": true, 396 | "sideBar": true, 397 | "skip_h1_title": false, 398 | "title_cell": "Table of Contents", 399 | "title_sidebar": "Contents", 400 | "toc_cell": false, 401 | "toc_position": {}, 402 | "toc_section_display": true, 403 | "toc_window_display": false 404 | } 405 | }, 406 | "nbformat": 4, 407 | "nbformat_minor": 4 408 | } 409 | -------------------------------------------------------------------------------- /第一问所有特征.csv: -------------------------------------------------------------------------------- 1 | 企业代号,销-进金额,增值税,销1,销2,销3,销4,进1,进2,进3,进4,进项发票的作废比例,销项发票的作废比例,绝对数变化,比例变化,是否扭亏为盈利,是否变为亏损,下属部门,分公司 ,公司,个体经营,评级,信誉评级,是否违约 2 | E1,-1898184082,0,179,51,51,71,332,74,27,3,0.055797733,0.027620222,-193872136,1.720613861,0,0,0,0,1,0,1,A,否 3 | E2,426297552.3,28261615.82,1147,268,121,43,2424,780,314,103,0.022421943,0.082002046,110302536.5,2.197674176,0,0,0,0,1,0,1,A,否 4 | E3,514987171,88562269.38,48,48,25,14,386,103,58,23,0.042534532,0.015993021,-115477286.1,0.516353348,0,0,0,1,0,0,3,C,否 5 | E4,1511803093,254330043.4,16,10,0,0,94,21,13,2,0.066308244,0.085163604,-1168509293,0.053642121,0,0,0,0,1,0,3,C,否 6 | E5,691527.38,548958.88,16,15,2,0,324,47,19,2,0.039188566,0.051886792,4068217.15,85.22708692,0,0,0,0,1,0,2,B,否 7 | E6,45673584.07,9086998.62,85,38,18,1,1170,235,96,13,0.046132134,0.132129278,14520912.32,6.220033757,0,0,0,0,1,0,1,A,否 8 | E7,450248079.4,79329020.55,1004,101,103,3,1043,392,126,10,0.034664427,0.01435759,-170352223.8,0.282999939,0,0,0,1,0,0,1,A,否 9 | E8,186366411.1,13359969.4,2051,428,169,31,1758,547,216,72,0.033679446,0.113103448,10175963.7,1.190343495,0,0,0,0,1,0,1,A,否 10 | E9,294700968.1,48008699.68,26,28,24,45,497,138,72,26,0.02189611,0.024720623,8701373.76,1.102641628,0,0,0,1,0,0,1,A,否 11 | E10,334674157.2,10308763.71,14,2,7,3,707,122,51,0,0.057847205,0.091549296,82858784.61,5.695238829,0,0,0,0,1,0,2,B,否 12 | E11,5011729.91,2085745.11,1,1,2,0,353,96,13,3,0.042665726,0.05908684,-38346246.26,0.062246969,0,0,0,0,1,0,3,C,否 13 | E12,119554241,12226064.87,1,3,1,0,298,77,16,2,0.021714922,0.077192982,-84557485.89,0.314231061,0,0,0,0,1,0,2,B,否 14 | E13,114691249.5,19162604.52,2699,342,111,25,456,150,73,43,0.015888336,0.149531543,17290905.08,1.624659042,0,0,0,0,1,0,1,A,否 15 | E14,86038310.36,17351210.64,499,176,145,0,609,170,126,0,0.077716547,0.071642686,8017782.6,1.260860968,0,0,0,0,0,1,3,C,否 16 | E15,207585880.2,7211027.52,0,0,1,1,27,5,1,0,0,0.063018242,28666792.4,2.030952353,0,0,0,0,1,0,1,A,否 17 | E16,209132769.2,4130668.31,34,9,8,2,63,11,3,0,0.003448276,0.111617312,39872630.5,4.427526479,0,0,0,0,1,0,1,A,否 18 | E17,12371632.79,0,136,37,6,3,633,165,67,4,0.056157505,0.170118343,10825143.39,4.609111121,0,0,0,0,1,0,1,A,否 19 | E18,50065149.39,0,32,5,8,1,507,94,38,2,0.053772767,0.094488189,17393819.63,25.37000438,0,0,0,0,1,0,1,A,否 20 | E19,-10663999.95,0,359,105,32,12,112,35,15,5,0.026246719,0.089706357,457386.04,1.295208096,0,0,0,0,1,0,1,A,否 21 | E20,-2938233.73,871931.22,20,1,3,1,51,10,8,1,0.078761062,0.051480051,4408596.54,3.448482477,0,0,0,0,1,0,2,B,否 22 | E21,-11541727.45,2010367.05,312,13,0,0,398,67,13,2,0.071799308,0.025920873,9020452.64,-107.8554226,1,0,0,0,1,0,2,B,否 23 | E22,39408223.26,1007767.46,29,12,3,1,54,17,8,1,0.055900621,0.125480154,34495523.8,-68.36411556,1,0,0,0,1,0,1,A,否 24 | E23,7810606.37,1343556.27,58,27,10,23,463,150,56,13,0.03009493,0.022969188,-6130097.84,-3.152684243,0,1,0,0,1,0,2,B,否 25 | E24,63461390.07,2581218.68,25,17,4,2,272,73,18,4,0.021006351,0.080808081,21065382.97,3.750814974,0,0,0,0,1,0,1,A,否 26 | E25,28738558.94,4790430.02,182,37,13,1,267,76,20,4,0.013475177,0.167689162,3950781.03,1.871990389,0,0,0,0,1,0,3,C,否 27 | E26,-1938217.2,0,29,12,1,0,80,23,9,5,0.041719343,0.107142857,801968.45,-1.535171354,1,0,0,0,1,0,1,A,否 28 | E27,-2964169.13,0,118,36,13,10,307,66,21,9,0.035087719,0.053814714,340587.89,-4.576462138,1,0,0,0,1,0,1,A,否 29 | E28,47653987.22,4585203.06,1,1,2,1,46,14,1,0,0.076923077,0.077328647,19643668.89,7.404078616,0,0,0,0,1,0,2,B,否 30 | E29,45010590.57,1778869.77,1,1,1,0,15,1,1,0,0.04,0.012631579,11138949.26,1.85630192,0,0,0,0,1,0,3,C,是 31 | E30,49372586.08,3403861.37,3,3,1,0,145,17,7,0,0.018567639,0.06625,2738199.31,1.234802735,0,0,0,0,1,0,2,B,否 32 | E31,42826768.62,6338508.75,64,28,37,22,87,10,5,5,0.019417476,0.041027607,7131326.48,1.679119834,0,0,0,0,1,0,1,A,否 33 | E32,41734977.49,1132413.07,9,0,1,0,207,49,16,0,0.005307856,0.152173913,11818843.08,2.063600841,0,0,0,0,1,0,2,B,否 34 | E33,-13685226.15,3473876.56,61,16,7,0,186,42,9,3,0.123743233,0.150046598,-1247606.58,0.512545099,0,0,0,0,1,0,2,B,否 35 | E34,30811458.94,1004529,56,6,2,0,379,56,12,0,0.103074924,0.157001414,-1442409.79,140.7276366,0,0,0,0,1,0,2,B,否 36 | E35,1307511.65,0,20,5,4,2,57,29,13,2,0.053811659,0.042857143,4954392.81,-11.28383944,1,0,0,0,1,0,2,B,否 37 | E36,14510309.69,1949388.05,87,60,51,0,319,62,36,0,0.045232274,0.076276665,5859723.15,3.214112585,0,0,0,0,1,0,4,D,是 38 | E37,-1828933.02,0,219,44,12,1,55,16,7,1,0.042071197,0.06991359,20100.51,0.945329365,0,0,0,0,1,0,2,B,否 39 | E38,33531866.64,617679.99,5,2,1,0,191,40,5,0,0.033299697,0.121019108,3241096.45,1.331340181,0,0,0,0,1,0,2,B,否 40 | E39,28820097.95,3723255.27,1,0,1,0,44,9,1,0,0.010638298,0.106060606,13935775.47,7.45272558,0,0,0,0,1,0,3,C,否 41 | E40,29114217.69,546512.99,452,31,0,0,641,111,29,13,0.025412961,0.093997735,13206723.03,6.115626836,0,0,0,0,1,0,3,C,否 42 | E41,26983203.31,460999.03,5,3,3,2,369,86,23,8,0.031972455,0.087481146,12254350.03,4.989140651,0,0,0,0,1,0,3,C,否 43 | E42,26225027.98,0,23,5,0,0,6,1,0,0,0,0.100917431,12910420.96,12.13534949,0,0,0,0,1,0,1,A,否 44 | E43,14107614.55,711302.84,3,2,0,0,15,4,0,0,0.128205128,0.126530612,1390239.32,1.182075685,0,0,0,0,1,0,2,B,否 45 | E44,2524250.34,326537.44,13,12,3,2,137,52,20,2,0.020833333,0.07926078,899638.66,6.562504224,0,0,0,0,1,0,3,C,否 46 | E45,459652.75,301217.76,55,8,0,0,92,19,3,0,0.013333333,0.061538462,-697757.92,-5.818097757,0,1,0,0,0,1,2,B,是 47 | E46,6666931.75,305058.31,104,27,8,1,342,69,21,3,0.020440252,0.087268994,1283506.09,12.06990003,0,0,0,0,1,0,3,C,否 48 | E47,5743256.84,1001210.12,414,161,106,79,234,66,46,28,0.032358003,0.046048891,1285307.17,2.966145392,0,0,0,0,1,0,3,C,否 49 | E48,37027025.99,5451628.06,35,13,5,6,164,43,21,3,0.020242915,0.05511811,17195806.14,5.250937641,0,0,0,0,1,0,1,A,否 50 | E49,9628430.92,956642.35,244,4,0,0,551,130,28,4,0.026832642,0.101364522,-4707428.38,0.389280788,0,0,0,0,1,0,3,C,否 51 | E50,16419820.69,709422.73,4,1,0,0,54,11,1,0,0.055837563,0.105263158,6781297.75,2.710812996,0,0,0,0,1,0,3,C,否 52 | E51,7169019.19,532024.51,87,22,15,5,45,8,6,2,0.004415011,0.068156425,5553169.22,149.5400353,0,0,0,0,1,0,2,B,否 53 | E52,1785006.11,266081.32,8,2,1,0,108,20,3,0,0.037647059,0.245382586,1095999.81,2.928640925,0,0,0,0,1,0,4,D,是 54 | E53,1234243.06,0,33,6,4,0,48,13,1,0,0.049206349,0.127753304,244683.34,1.233460399,0,0,0,0,1,0,3,C,否 55 | E54,24868467.73,3137410.18,700,187,212,11,347,76,28,6,0.054136253,0.081690945,3650147.27,1.559499811,0,0,0,0,1,0,1,A,否 56 | E55,11859438.55,1377481.76,18,2,2,0,96,32,6,3,0.086269745,0.249152542,-3163800.21,0.461382188,0,0,0,1,0,0,3,C,否 57 | E56,-1408942.97,0,21,6,6,4,114,34,9,5,0.040799334,0.074747475,590087.43,0.608077187,0,0,0,0,1,0,3,C,否 58 | E57,2973362.72,671931.59,18,6,1,0,32,8,1,0,0.061538462,0.118721461,-2307020.6,-520.3782614,0,1,0,0,1,0,2,B,否 59 | E58,14939854.55,1044924.28,18,2,2,0,284,52,18,0,0.028846154,0.268041237,4623582.12,2.631909484,0,0,0,0,1,0,2,B,否 60 | E59,7399026.38,1165767.6,131,34,15,5,52,17,5,2,0.030965392,0.038157282,2076239.55,2.668089467,0,0,0,0,1,0,1,A,否 61 | E60,3153536.31,0,3,4,1,0,102,24,2,0,0.014869888,0.173489279,2762867.14,16.64818925,0,0,0,0,1,0,2,B,否 62 | E61,16040129.65,1704753.35,47,8,9,1,28,8,6,2,0.014150943,0.018574297,-1426691.21,0.75169374,0,0,0,0,1,0,2,B,否 63 | E62,8262640.47,497880.69,20,4,2,1,108,27,12,1,0.005660377,0.127853881,-1325522.5,0.640706858,0,0,0,1,0,0,2,B,否 64 | E63,10859983.19,3946576.94,29,11,2,0,237,34,19,2,0.029879212,0.071322437,-7595933.39,0.163752575,0,0,0,0,1,0,2,B,否 65 | E64,8798256.56,55237.46,131,20,10,1,17,2,2,0,0,0.066909091,412503.15,1.153526918,0,0,0,0,1,0,1,A,否 66 | E65,2726684.71,237765.7,36,14,4,4,21,12,5,0,0.01362862,0.06185567,745267.07,3.181304821,0,0,0,0,1,0,2,B,否 67 | E66,-375084.44,0,113,25,12,1,19,5,4,2,0.003466205,0.075110457,1573358.29,163.8030821,0,0,0,0,1,0,2,B,否 68 | E67,4315445.45,99554,18,3,2,0,157,25,12,1,0.030911901,0.106976744,2189387.65,5.055198896,0,0,0,0,1,0,2,B,否 69 | E68,5866711.11,89627.09,5,2,0,0,1,1,0,0,0,0.109589041,-2731449.09,0.364643359,0,0,0,0,1,0,3,C,否 70 | E69,3636062.74,109081.89,12,4,2,0,1,0,0,0,0,0.109028961,2661870.58,9.974805767,0,0,1,0,0,0,3,C,否 71 | E70,3595798.73,580622.56,39,8,5,3,138,29,13,7,0.022963368,0.188581315,-2036647.49,0.059261052,0,0,0,0,1,0,2,B,否 72 | E71,417041.18,0,260,18,4,0,214,48,19,0,0.042891183,0.042666667,5842485.55,-0.270946377,1,0,0,0,1,0,2,B,否 73 | E72,2943665.95,0,25,3,0,0,41,9,3,0,0.025641026,0.290322581,1453197.42,-13.39267755,1,0,0,0,1,0,3,C,否 74 | E73,5350093.41,909143.31,48,7,2,1,84,21,8,5,0.00990099,0.055084746,-1344863.84,0.494727487,0,0,0,0,1,0,3,C,否 75 | E74,3649790.04,0,8,6,3,2,7,1,0,0,0,0.077477477,1388223.66,6.797415479,0,0,0,0,1,0,2,B,否 76 | E75,3859608.51,26412.04,7451,9,2,1,34,12,4,3,0.025362319,0.068284229,-1700637.8,0.219843181,0,0,0,0,1,0,3,C,否 77 | E76,3156443.92,203043.27,58,4,9,2,78,16,7,1,0.011428571,0.089795918,940249.39,2.752940948,0,0,0,0,1,0,2,B,否 78 | E77,3607798.82,159249.17,0,1,1,0,13,2,0,1,0.018518519,0.023255814,-759119.19,0.402139273,0,0,0,0,1,0,3,C,否 79 | E78,6029427.49,260670.99,30,5,2,0,11,4,1,0,0,0.074626866,-2691870.88,0.187109816,0,0,0,0,0,1,3,C,否 80 | E79,363491.02,0,9,4,1,1,30,11,3,2,0.012987013,0.139534884,1034964.7,-1.507861627,1,0,0,0,1,0,2,B,否 81 | E80,1148651.19,27552.75,38,0,0,0,82,15,6,0,0.012658228,0.020833333,3297244.36,-5.026949012,1,0,0,0,1,0,3,C,否 82 | E81,1330514.4,221435.23,70,30,17,2,49,13,5,1,0.028436019,0.061633282,182894.31,1.580709088,0,0,0,0,1,0,1,A,否 83 | E82,1385376.45,238891.41,7,0,1,0,41,17,10,0,0.012698413,0.023323615,118211.17,1.384650542,0,0,0,0,1,0,4,D,是 84 | E83,-34876613.63,0,130,30,0,0,162,52,37,8,0.066089965,0.080736544,-11859813.95,3.545184778,0,0,0,0,1,0,2,B,否 85 | E84,3392431.19,521610.51,24,7,4,0,16,1,1,1,0.018518519,0.106425703,641060.92,1.908647944,0,0,0,0,1,0,1,A,否 86 | E85,2247393.78,55871.8,14,6,1,2,25,6,6,1,0.004366812,0.156996587,509292.93,3.370016672,0,0,0,0,1,0,2,B,否 87 | E86,411411.22,54856.36,3,4,0,0,29,6,1,0,0.007246377,0.136363636,-606338.28,0.224465111,0,0,0,1,0,0,3,C,否 88 | E87,2288881.09,240920.4,5,4,3,1,62,17,2,1,0.021108179,0.040540541,674008.58,3.983327704,0,0,0,0,1,0,3,C,是 89 | E88,1877583.81,160137.38,78,8,0,0,17,7,1,1,0,0.09,181422.33,1.393914056,0,0,0,0,1,0,1,A,否 90 | E89,-824452.26,159405.57,16,4,4,0,6,5,1,0,0.030769231,0.040697674,2630392.22,-0.244160693,1,0,0,0,1,0,1,A,否 91 | E90,2488785.48,167307.28,38,8,3,0,29,6,0,0,0.028776978,0.283524904,426767.62,1.71430123,0,0,0,0,1,0,3,C,否 92 | E91,930397.99,148985.33,65,7,1,0,20,8,6,1,0.050847458,0.046511628,267744.88,2.254806928,0,0,0,0,1,0,1,A,否 93 | E92,2060494.4,55579.19,11,2,0,0,73,12,4,0,0.004694836,0.076923077,1226750.25,4.818772443,0,0,0,0,1,0,3,C,否 94 | E93,1158753.81,26196.36,19,5,4,0,53,8,4,0,0.023474178,0.047297297,190487.11,1.773236926,0,0,0,0,1,0,2,B,否 95 | E94,1039150.21,29299.68,54,2,0,0,4,0,0,0,0,0.091836735,296547.2,2.761899454,0,0,0,0,1,0,3,C,否 96 | E95,2309683.68,69258.77,446,32,7,0,0,0,1,0,0,0.053669222,-173150.47,0.762139235,0,0,0,0,1,0,2,B,否 97 | E96,-880797.48,0,3,0,0,0,4,2,0,0,0,0.1,1348697.76,33.85250329,0,0,0,0,1,0,3,C,否 98 | E97,952646.12,28016.78,44,17,2,0,1,0,1,0,0,0.034090909,428086.28,2.842571436,0,0,1,0,0,0,2,B,否 99 | E98,1203728.22,31893.6,20,3,2,0,25,1,1,0,0.019607843,0.130718954,-106852.63,0.805678733,0,0,0,0,1,0,2,B,否 100 | E99,-5203665.22,0,1,1,0,0,73,7,1,0,0.010227273,0.375,-1114717.37,-1.215629991,0,1,0,0,1,0,4,D,是 101 | E100,650279.93,17886.26,9,2,1,1,15,5,1,0,0,0.01010101,-111912.9,0.641697941,0,0,0,0,1,0,4,D,是 102 | E101,294643.81,44576.19,2,0,0,0,4,2,0,0,0,0.492063492,-119859.57,0.421671447,0,0,0,0,1,0,4,D,是 103 | E102,-2841555.25,0,91,7,2,0,28,21,12,9,0.02739726,0.146892655,43268.35,0.951715913,0,0,0,0,1,0,4,D,是 104 | E103,990178.47,62712.09,16,3,0,0,56,7,2,0,0.01744186,0.15625,49319.54,1.085415704,0,0,0,0,1,0,4,D,是 105 | E104,262386.36,7863.64,10,0,0,0,1,0,0,0,0,0.095238095,103233.01,2.299433945,0,0,0,0,1,0,3,C,否 106 | E105,917382.45,26701.44,101,0,0,0,2,2,0,0,0,0.008196721,-134853.56,0.572421947,0,0,1,0,0,0,3,C,否 107 | E106,526975.73,15241.03,95,6,2,0,15,4,0,0,0.027777778,0.098039216,139940.23,2.157468663,0,0,0,0,1,0,2,B,否 108 | E107,682155.01,40934.99,2,0,1,0,5,2,0,0,0,0.37037037,600297.72,10.30893452,0,0,0,0,1,0,4,D,是 109 | E108,92174.11,0,18,1,0,0,51,18,1,0,0,0,-123849.11,0.035964429,0,0,0,0,1,0,4,D,是 110 | E109,439509.33,13926.67,16,1,0,0,5,2,0,0,0,0.303030303,274393.5,7.229384822,0,0,0,0,1,0,4,D,是 111 | E110,195956.47,5831.04,29,5,0,0,0,0,1,0,0,0.253012048,21642.65,1.78831463,0,0,1,0,0,0,3,C,否 112 | E111,124997.02,100848.16,5,0,1,3,74,15,4,2,0.050359712,0.184,-271144.93,-2.37302917,0,1,0,0,1,0,4,D,是 113 | E112,86946.74,19202.37,1,1,0,0,17,6,2,0,0.037735849,0.272727273,208206.57,2.69992051,0,0,0,0,1,0,4,D,是 114 | E113,-1006740.82,0,2,1,0,0,26,8,1,0,0.019230769,0.340425532,-82420.22,-0.038912999,0,1,0,0,1,0,4,D,是 115 | E114,-397912.31,0,19,1,3,0,15,4,1,0,0.092105263,0.12,-356779.9,-5.032003203,0,1,0,0,1,0,4,D,是 116 | E115,55005.68,1634.32,3,0,0,0,2,0,0,0,0,0.333333333,35728.16,4.706717462,0,0,0,0,1,0,4,D,是 117 | E116,228821.67,6742.4,24,1,0,0,8,0,0,0,0,0.085106383,-75814.26,0.066151003,0,0,0,0,1,0,4,D,是 118 | E117,516642.52,1405.48,0,1,0,0,3,0,0,0,0,0.409090909,211072.46,2.381499614,0,0,0,0,1,0,4,D,是 119 | E118,120774.03,0,111,5,0,0,7,2,0,0,0.068965517,0.055555556,-141118.15,-0.339462906,0,1,0,0,1,0,4,D,是 120 | E119,-184120.28,0,12,1,1,0,7,6,2,2,0.003174603,0.142857143,-68619.22,6.999116995,0,0,0,0,1,0,4,D,是 121 | E120,162416.14,3780.94,6,0,0,0,13,3,0,0,0.027777778,0.689655172,8830.95,1.139033405,0,0,0,0,1,0,4,D,是 122 | E121,-972024.16,0,110,6,0,0,7,2,2,0,0,0.123655914,690652.18,-0.075121715,1,0,0,0,1,0,4,D,是 123 | E122,7342.16,0,67,2,0,0,18,3,1,0,0.020833333,0.13559322,-27347.29,-0.681495628,0,1,0,0,1,0,4,D,是 124 | E123,199443.4,27276.3,3,1,1,0,1,1,0,0,0,0.492307692,17453.84,1.824360564,0,0,0,0,1,0,4,D,是 125 | -------------------------------------------------------------------------------- /第一问:作废发票计算.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 146, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "from datetime import datetime\n", 13 | "import time" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "# 进销口分析" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "input1 = pd.read_excel('附件1.xlsx',sheet_name='进项发票信息')\n", 30 | "output1 = pd.read_excel('附件1.xlsx',sheet_name='销项发票信息')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 22, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "for num in range(1,124):\n", 40 | " id1 = 'E'+str(num)\n", 41 | " input1[input1['企业代号']==id1]" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 41, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "0.05579773321708806" 53 | ] 54 | }, 55 | "execution_count": 41, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "num = 1\n", 62 | "id1 = 'E'+str(num)\n", 63 | "tem = input1[input1['企业代号']==id1]\n", 64 | "all_num = tem.shape[0]\n", 65 | "fei_num = tem[tem['发票状态']=='作废发票'].shape[0]\n", 66 | "ratio = fei_num/all_num\n", 67 | "ratio" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 47, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "r_in=[]\n", 77 | "\n", 78 | "for num in range(1,124):\n", 79 | " id1 = 'E'+str(num)\n", 80 | " tem = input1[input1['企业代号']==id1]\n", 81 | " all_num = tem.shape[0]\n", 82 | " fei_num = tem[tem['发票状态']=='作废发票'].shape[0]\n", 83 | " ratio = fei_num/all_num\n", 84 | " \n", 85 | " r_in.append(ratio)\n", 86 | " \n", 87 | "r_out=[]\n", 88 | "\n", 89 | "for num in range(1,124):\n", 90 | " id1 = 'E'+str(num)\n", 91 | " tem = output1[output1['企业代号']==id1]\n", 92 | " all_num = tem.shape[0]\n", 93 | " fei_num = tem[tem['发票状态']=='作废发票'].shape[0]\n", 94 | " ratio = fei_num/all_num\n", 95 | " \n", 96 | " r_out.append(ratio)\n", 97 | "\n", 98 | " \n", 99 | "\n", 100 | "name=[] \n", 101 | "for num in range(1,124):\n", 102 | " id1 = 'E'+str(num)\n", 103 | " name.append(id1)\n", 104 | " " 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 55, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "in_num=[]\n", 114 | "for num in range(1,124):\n", 115 | " id1 = 'E'+str(num)\n", 116 | " tem = input1[input1['企业代号']==id1]\n", 117 | " all_num = tem.shape[0]\n", 118 | " \n", 119 | " in_num.append(all_num)\n", 120 | " \n", 121 | "out_num=[]\n", 122 | "for num in range(1,124):\n", 123 | " id1 = 'E'+str(num)\n", 124 | " tem = output1[output1['企业代号']==id1]\n", 125 | " all_num = tem.shape[0]\n", 126 | " \n", 127 | " out_num.append(all_num)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 56, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/html": [ 138 | "
\n", 139 | "\n", 152 | "\n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | "
企业名称进项发票的作废比例销项发票的作废比例进项发票数销项发票数
0E10.0557980.02762034418110
1E20.0224220.0820023215612707
2E30.0425350.015993456124073
3E40.0663080.0851645582231
4E50.0391890.05188721691060
..................
118E1190.0031750.14285731521
119E1200.0277780.6896553629
120E1210.0000000.12365650186
121E1220.0208330.13559348118
122E1230.0000000.492308365
\n", 254 | "

123 rows × 5 columns

\n", 255 | "
" 256 | ], 257 | "text/plain": [ 258 | " 企业名称 进项发票的作废比例 销项发票的作废比例 进项发票数 销项发票数\n", 259 | "0 E1 0.055798 0.027620 3441 8110\n", 260 | "1 E2 0.022422 0.082002 32156 12707\n", 261 | "2 E3 0.042535 0.015993 4561 24073\n", 262 | "3 E4 0.066308 0.085164 558 2231\n", 263 | "4 E5 0.039189 0.051887 2169 1060\n", 264 | ".. ... ... ... ... ...\n", 265 | "118 E119 0.003175 0.142857 315 21\n", 266 | "119 E120 0.027778 0.689655 36 29\n", 267 | "120 E121 0.000000 0.123656 50 186\n", 268 | "121 E122 0.020833 0.135593 48 118\n", 269 | "122 E123 0.000000 0.492308 3 65\n", 270 | "\n", 271 | "[123 rows x 5 columns]" 272 | ] 273 | }, 274 | "execution_count": 56, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "data = {'企业名称':name,\n", 281 | " '进项发票的作废比例':r_in ,\n", 282 | " '销项发票的作废比例':r_out ,\n", 283 | " '进项发票数':in_num ,\n", 284 | " '销项发票数':out_num\n", 285 | "}\n", 286 | "\n", 287 | "df=pd.DataFrame(data=data)\n", 288 | "df" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 57, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "df.to_csv('附件1作废发票比例.csv',encoding='gbk')" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "# 企业文字提取" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 59, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/html": [ 315 | "
\n", 316 | "\n", 329 | "\n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | "
企业代号企业名称信誉评级是否违约
0E1***电器销售有限公司A
1E2***技术有限责任公司A
2E3***电子(中国)有限公司***分公司C
3E4***发展有限责任公司C
4E5***供应链管理有限公司B
...............
118E119***药房D
119E120***陈列广告有限公司D
120E121***药业连锁有限公司***药店D
121E122***商贸有限责任公司D
122E123***创科技有限责任公司D
\n", 419 | "

123 rows × 4 columns

\n", 420 | "
" 421 | ], 422 | "text/plain": [ 423 | " 企业代号 企业名称 信誉评级 是否违约\n", 424 | "0 E1 ***电器销售有限公司 A 否\n", 425 | "1 E2 ***技术有限责任公司 A 否\n", 426 | "2 E3 ***电子(中国)有限公司***分公司 C 否\n", 427 | "3 E4 ***发展有限责任公司 C 否\n", 428 | "4 E5 ***供应链管理有限公司 B 否\n", 429 | ".. ... ... ... ...\n", 430 | "118 E119 ***药房 D 是\n", 431 | "119 E120 ***陈列广告有限公司 D 是\n", 432 | "120 E121 ***药业连锁有限公司***药店 D 是\n", 433 | "121 E122 ***商贸有限责任公司 D 是\n", 434 | "122 E123 ***创科技有限责任公司 D 是\n", 435 | "\n", 436 | "[123 rows x 4 columns]" 437 | ] 438 | }, 439 | "execution_count": 59, 440 | "metadata": {}, 441 | "output_type": "execute_result" 442 | } 443 | ], 444 | "source": [ 445 | "data = pd.read_excel('附件1.xlsx',sheet_name='企业信息')\n", 446 | "data" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "# 时间序列分析" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 67, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [ 483 | "for i in range(len(input1)): # 删除作废发票\n", 484 | " if input1.loc[i,'发票状态']=='作废发票':\n", 485 | " input1.drop(i,inplace=True)" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": 68, 491 | "metadata": {}, 492 | "outputs": [], 493 | "source": [ 494 | "for i in range(len(output1)):\n", 495 | " if output1.loc[i,'发票状态']=='作废发票':\n", 496 | " output1.drop(i,inplace=True)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 72, 502 | "metadata": {}, 503 | "outputs": [], 504 | "source": [ 505 | "input1.reset_index(drop = True,inplace=True)\n", 506 | "output1.reset_index(drop = True,inplace=True)" 507 | ] 508 | }, 509 | { 510 | "cell_type": "code", 511 | "execution_count": 73, 512 | "metadata": {}, 513 | "outputs": [], 514 | "source": [ 515 | "input1['开票日期'] = pd.to_datetime(input1['开票日期'])\n", 516 | "output1['开票日期'] = pd.to_datetime(output1['开票日期'])" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 219, 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [ 525 | "deal1=[]\n", 526 | "deal2=[]\n", 527 | "deal3=[]\n", 528 | "deal4=[]\n", 529 | "deal5=[]\n", 530 | "for num in range(1,124): # 遍历目标企业\n", 531 | " l_1 = 0\n", 532 | " l_2 = 0\n", 533 | " l_3 = 0 # 一年、两年、三年、四年的交易方\n", 534 | " l_4 = 0\n", 535 | " l_5 = 0\n", 536 | "\n", 537 | " id1 = 'E'+str(num)\n", 538 | " a = input1[input1['企业代号']==id1]\n", 539 | " a = a.reset_index(drop=True)\n", 540 | " other = np.unique(a['销方单位代号']) # 交易方代号的唯一值\n", 541 | " \n", 542 | " year = a['开票日期'][len(a)-1].year - a['开票日期'][0].year+1 \n", 543 | " begin = a['开票日期'][0].year\n", 544 | " \n", 545 | " for i in range(len(other)): # 遍历交易企业\n", 546 | " cou=0\n", 547 | " company = other[i] # 选中的交易企业\n", 548 | " for j in range(year): # 遍历每一年\n", 549 | " tem = a[(a['开票日期'] >str(begin+j) )& (a['开票日期'] \n", 595 | "\n", 608 | "\n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | "
持续一年期的交易企业个数持续二年期的交易企业个数持续三年期的交易企业个数持续四年前的交易企业个数持续五年期的交易企业个数
0332742730
124247803141030
238610358230
394211320
4324471920
..................
11876220
119133000
12072200
121183100
12211000
\n", 710 | "

123 rows × 5 columns

\n", 711 | "" 712 | ], 713 | "text/plain": [ 714 | " 持续一年期的交易企业个数 持续二年期的交易企业个数 持续三年期的交易企业个数 持续四年前的交易企业个数 持续五年期的交易企业个数\n", 715 | "0 332 74 27 3 0\n", 716 | "1 2424 780 314 103 0\n", 717 | "2 386 103 58 23 0\n", 718 | "3 94 21 13 2 0\n", 719 | "4 324 47 19 2 0\n", 720 | ".. ... ... ... ... ...\n", 721 | "118 7 6 2 2 0\n", 722 | "119 13 3 0 0 0\n", 723 | "120 7 2 2 0 0\n", 724 | "121 18 3 1 0 0\n", 725 | "122 1 1 0 0 0\n", 726 | "\n", 727 | "[123 rows x 5 columns]" 728 | ] 729 | }, 730 | "execution_count": 223, 731 | "metadata": {}, 732 | "output_type": "execute_result" 733 | } 734 | ], 735 | "source": [ 736 | "jx = pd.DataFrame(data=da1)\n", 737 | "jx" 738 | ] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "execution_count": 301, 743 | "metadata": {}, 744 | "outputs": [], 745 | "source": [ 746 | "jx.to_csv('上游持续交易企业数.csv',encoding='gbk')" 747 | ] 748 | }, 749 | { 750 | "cell_type": "code", 751 | "execution_count": 230, 752 | "metadata": {}, 753 | "outputs": [], 754 | "source": [ 755 | "deal1=[]\n", 756 | "deal2=[]\n", 757 | "deal3=[]\n", 758 | "deal4=[]\n", 759 | "for num in range(1,124): # 遍历目标企业\n", 760 | " l_1 = 0\n", 761 | " l_2 = 0\n", 762 | " l_3 = 0 # 一年、两年、三年、四年的交易方\n", 763 | " l_4 = 0\n", 764 | "\n", 765 | " id1 = 'E'+str(num)\n", 766 | " a = output1[output1['企业代号']==id1]\n", 767 | " a = a.reset_index(drop=True)\n", 768 | " other = np.unique(a['购方单位代号']) # 交易方代号的唯一值\n", 769 | " \n", 770 | " year = a['开票日期'][len(a)-1].year - a['开票日期'][0].year+1 \n", 771 | " begin = a['开票日期'][0].year\n", 772 | " \n", 773 | " for i in range(len(other)): # 遍历交易企业\n", 774 | " cou=0\n", 775 | " company = other[i] # 选中的交易企业\n", 776 | " for j in range(year): # 遍历每一年\n", 777 | " tem = a[(a['开票日期'] >str(begin+j) )& (a['开票日期'] \n", 807 | "\n", 820 | "\n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | "
持续一年期的交易企业个数持续二年期的交易企业个数持续三年期的交易企业个数持续四年前的交易企业个数
0179515171
1114726812143
248482514
3161000
4161520
...............
11812110
1196000
120110600
12167200
1223110
\n", 910 | "

123 rows × 4 columns

\n", 911 | "" 912 | ], 913 | "text/plain": [ 914 | " 持续一年期的交易企业个数 持续二年期的交易企业个数 持续三年期的交易企业个数 持续四年前的交易企业个数\n", 915 | "0 179 51 51 71\n", 916 | "1 1147 268 121 43\n", 917 | "2 48 48 25 14\n", 918 | "3 16 10 0 0\n", 919 | "4 16 15 2 0\n", 920 | ".. ... ... ... ...\n", 921 | "118 12 1 1 0\n", 922 | "119 6 0 0 0\n", 923 | "120 110 6 0 0\n", 924 | "121 67 2 0 0\n", 925 | "122 3 1 1 0\n", 926 | "\n", 927 | "[123 rows x 4 columns]" 928 | ] 929 | }, 930 | "execution_count": 231, 931 | "metadata": {}, 932 | "output_type": "execute_result" 933 | } 934 | ], 935 | "source": [ 936 | "da2= {'持续一年期的交易企业个数':deal1,\n", 937 | " '持续二年期的交易企业个数': deal2,\n", 938 | " '持续三年期的交易企业个数': deal3,\n", 939 | " '持续四年前的交易企业个数':deal4\n", 940 | " }\n", 941 | "\n", 942 | "xx = pd.DataFrame(data=da2)\n", 943 | "xx " 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": 300, 949 | "metadata": {}, 950 | "outputs": [], 951 | "source": [ 952 | "xx.to_csv('下游持续交易企业.csv',encoding='gbk')" 953 | ] 954 | }, 955 | { 956 | "cell_type": "code", 957 | "execution_count": null, 958 | "metadata": {}, 959 | "outputs": [], 960 | "source": [] 961 | }, 962 | { 963 | "cell_type": "code", 964 | "execution_count": 233, 965 | "metadata": {}, 966 | "outputs": [], 967 | "source": [ 968 | "in_y=[]\n", 969 | "for num in range(1,124): # 遍历目标企业\n", 970 | " l_1 = 0\n", 971 | " l_2 = 0\n", 972 | " l_3 = 0 # 一年、两年、三年、四年的交易方\n", 973 | " l_4 = 0\n", 974 | " l_5 = 0\n", 975 | "\n", 976 | " id1 = 'E'+str(num)\n", 977 | " a = input1[input1['企业代号']==id1]\n", 978 | " a = a.reset_index(drop=True)\n", 979 | " other = np.unique(a['销方单位代号']) # 交易方代号的唯一值\n", 980 | " \n", 981 | " year = a['开票日期'][len(a)-1].year - a['开票日期'][0].year+1 \n", 982 | " in_y.append(year)\n", 983 | "\n", 984 | "\n", 985 | "\n", 986 | "out_y=[]\n", 987 | "for num in range(1,124): # 遍历目标企业\n", 988 | " l_1 = 0\n", 989 | " l_2 = 0\n", 990 | " l_3 = 0 # 一年、两年、三年、四年的交易方\n", 991 | " l_4 = 0\n", 992 | "\n", 993 | " id1 = 'E'+str(num)\n", 994 | " a = output1[output1['企业代号']==id1]\n", 995 | " a = a.reset_index(drop=True)\n", 996 | " other = np.unique(a['购方单位代号']) # 交易方代号的唯一值\n", 997 | " \n", 998 | " year = a['开票日期'][len(a)-1].year - a['开票日期'][0].year+1 \n", 999 | " out_y.append(year)" 1000 | ] 1001 | }, 1002 | { 1003 | "cell_type": "code", 1004 | "execution_count": null, 1005 | "metadata": {}, 1006 | "outputs": [], 1007 | "source": [] 1008 | }, 1009 | { 1010 | "cell_type": "code", 1011 | "execution_count": 245, 1012 | "metadata": {}, 1013 | "outputs": [], 1014 | "source": [ 1015 | "new_in = input1[(input1['开票日期']<'2020')] # 2020年之前的进项数据\n", 1016 | "new_out = output1[(output1['开票日期']<'2020')]" 1017 | ] 1018 | }, 1019 | { 1020 | "cell_type": "code", 1021 | "execution_count": 246, 1022 | "metadata": {}, 1023 | "outputs": [ 1024 | { 1025 | "data": { 1026 | "text/html": [ 1027 | "
\n", 1028 | "\n", 1041 | "\n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | "
企业代号发票号码开票日期销方单位代号金额税额价税合计发票状态
0E133909392017-07-18A00297-943.40-56.60-1000.00有效发票
1E133909402017-07-18A00297-4780.24-286.81-5067.05有效发票
2E133909412017-07-18A00297943.4056.601000.00有效发票
3E133909422017-07-18A002974780.24286.815067.05有效发票
4E199026692017-08-07A05061326.219.79336.00有效发票
...........................
203333E122420556392019-03-25A1333217636.091058.1718694.26有效发票
203334E122547062342019-04-17A08967223.306.70230.00有效发票
203336E123384932952017-12-15A03624264.1515.85280.00有效发票
203337E123954720012018-12-29A03626264.1515.85280.00有效发票
203338E123544698832019-12-18A03626264.1515.85280.00有效发票
\n", 1179 | "

198371 rows × 8 columns

\n", 1180 | "
" 1181 | ], 1182 | "text/plain": [ 1183 | " 企业代号 发票号码 开票日期 销方单位代号 金额 税额 价税合计 发票状态\n", 1184 | "0 E1 3390939 2017-07-18 A00297 -943.40 -56.60 -1000.00 有效发票\n", 1185 | "1 E1 3390940 2017-07-18 A00297 -4780.24 -286.81 -5067.05 有效发票\n", 1186 | "2 E1 3390941 2017-07-18 A00297 943.40 56.60 1000.00 有效发票\n", 1187 | "3 E1 3390942 2017-07-18 A00297 4780.24 286.81 5067.05 有效发票\n", 1188 | "4 E1 9902669 2017-08-07 A05061 326.21 9.79 336.00 有效发票\n", 1189 | "... ... ... ... ... ... ... ... ...\n", 1190 | "203333 E122 42055639 2019-03-25 A13332 17636.09 1058.17 18694.26 有效发票\n", 1191 | "203334 E122 54706234 2019-04-17 A08967 223.30 6.70 230.00 有效发票\n", 1192 | "203336 E123 38493295 2017-12-15 A03624 264.15 15.85 280.00 有效发票\n", 1193 | "203337 E123 95472001 2018-12-29 A03626 264.15 15.85 280.00 有效发票\n", 1194 | "203338 E123 54469883 2019-12-18 A03626 264.15 15.85 280.00 有效发票\n", 1195 | "\n", 1196 | "[198371 rows x 8 columns]" 1197 | ] 1198 | }, 1199 | "execution_count": 246, 1200 | "metadata": {}, 1201 | "output_type": "execute_result" 1202 | } 1203 | ], 1204 | "source": [ 1205 | "new_in" 1206 | ] 1207 | }, 1208 | { 1209 | "cell_type": "code", 1210 | "execution_count": 292, 1211 | "metadata": {}, 1212 | "outputs": [], 1213 | "source": [ 1214 | "ur = []\n", 1215 | "\n", 1216 | "dd = []\n", 1217 | "for num in range(1,124): # 遍历目标企业\n", 1218 | " \n", 1219 | " id1 = 'E'+str(num)\n", 1220 | " a = new_in[new_in['企业代号']==id1]\n", 1221 | " a = a.reset_index(drop=True)\n", 1222 | " \n", 1223 | " end_1 = a['开票日期'][len(a)-1].year\n", 1224 | " begin_1 = a['开票日期'][0].year\n", 1225 | " \n", 1226 | " begin_ji_1 = a[(a['开票日期']>str(begin_1))&(a['开票日期']str(end_1))&(a['开票日期']str(begin_2))&(b['开票日期']str(end_2))&(b['开票日期']\n", 1399 | "\n", 1412 | "\n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | "
绝对数变化比例变化
0-1.938721e+081.720614
11.103025e+082.197674
2-1.154773e+080.516353
3-1.168509e+090.053642
44.068217e+0685.227087
.........
118-6.861922e+046.999117
1198.830950e+031.139033
1206.906522e+05-0.075122
121-2.734729e+04-0.681496
1221.745384e+041.824361
\n", 1478 | "

123 rows × 2 columns

\n", 1479 | "" 1480 | ], 1481 | "text/plain": [ 1482 | " 绝对数变化 比例变化\n", 1483 | "0 -1.938721e+08 1.720614\n", 1484 | "1 1.103025e+08 2.197674\n", 1485 | "2 -1.154773e+08 0.516353\n", 1486 | "3 -1.168509e+09 0.053642\n", 1487 | "4 4.068217e+06 85.227087\n", 1488 | ".. ... ...\n", 1489 | "118 -6.861922e+04 6.999117\n", 1490 | "119 8.830950e+03 1.139033\n", 1491 | "120 6.906522e+05 -0.075122\n", 1492 | "121 -2.734729e+04 -0.681496\n", 1493 | "122 1.745384e+04 1.824361\n", 1494 | "\n", 1495 | "[123 rows x 2 columns]" 1496 | ] 1497 | }, 1498 | "execution_count": 294, 1499 | "metadata": {}, 1500 | "output_type": "execute_result" 1501 | } 1502 | ], 1503 | "source": [ 1504 | "data3 = {'绝对数变化':dd,\n", 1505 | " '比例变化':ur}\n", 1506 | "df3 = pd.DataFrame(data=data3)\n", 1507 | "df3" 1508 | ] 1509 | }, 1510 | { 1511 | "cell_type": "code", 1512 | "execution_count": 297, 1513 | "metadata": { 1514 | "scrolled": true 1515 | }, 1516 | "outputs": [ 1517 | { 1518 | "data": { 1519 | "text/html": [ 1520 | "
\n", 1521 | "\n", 1534 | "\n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | "
绝对数变化比例变化是否扭亏为盈利是否变为亏损
0-1.938721e+081.7206140.00.0
11.103025e+082.1976740.00.0
2-1.154773e+080.5163530.00.0
3-1.168509e+090.0536420.00.0
44.068217e+0685.2270870.00.0
...............
118-6.861922e+046.9991170.00.0
1198.830950e+031.1390330.00.0
1206.906522e+05-0.0751221.00.0
121-2.734729e+04-0.6814960.01.0
1221.745384e+041.8243610.00.0
\n", 1624 | "

123 rows × 4 columns

\n", 1625 | "
" 1626 | ], 1627 | "text/plain": [ 1628 | " 绝对数变化 比例变化 是否扭亏为盈利 是否变为亏损\n", 1629 | "0 -1.938721e+08 1.720614 0.0 0.0\n", 1630 | "1 1.103025e+08 2.197674 0.0 0.0\n", 1631 | "2 -1.154773e+08 0.516353 0.0 0.0\n", 1632 | "3 -1.168509e+09 0.053642 0.0 0.0\n", 1633 | "4 4.068217e+06 85.227087 0.0 0.0\n", 1634 | ".. ... ... ... ...\n", 1635 | "118 -6.861922e+04 6.999117 0.0 0.0\n", 1636 | "119 8.830950e+03 1.139033 0.0 0.0\n", 1637 | "120 6.906522e+05 -0.075122 1.0 0.0\n", 1638 | "121 -2.734729e+04 -0.681496 0.0 1.0\n", 1639 | "122 1.745384e+04 1.824361 0.0 0.0\n", 1640 | "\n", 1641 | "[123 rows x 4 columns]" 1642 | ] 1643 | }, 1644 | "execution_count": 297, 1645 | "metadata": {}, 1646 | "output_type": "execute_result" 1647 | } 1648 | ], 1649 | "source": [ 1650 | "for i in range(len(df3)):\n", 1651 | " if (df3.loc[i,'绝对数变化']>0) & (df3.loc[i,'比例变化']<0): # 说明扭亏为盈\n", 1652 | " df3.loc[i,'是否扭亏为盈利']=1\n", 1653 | " else:\n", 1654 | " df3.loc[i,'是否扭亏为盈利']=0\n", 1655 | "\n", 1656 | "for i in range(len(df3)):\n", 1657 | " if (df3.loc[i,'绝对数变化']<0) & (df3.loc[i,'比例变化']<0): # 说明扭亏为盈\n", 1658 | " df3.loc[i,'是否变为亏损']=1\n", 1659 | " else:\n", 1660 | " df3.loc[i,'是否变为亏损']=0 \n", 1661 | "\n", 1662 | "df3" 1663 | ] 1664 | }, 1665 | { 1666 | "cell_type": "code", 1667 | "execution_count": 299, 1668 | "metadata": {}, 1669 | "outputs": [], 1670 | "source": [ 1671 | "df3.to_csv('发展程度.csv',encoding='gbk')" 1672 | ] 1673 | }, 1674 | { 1675 | "cell_type": "code", 1676 | "execution_count": 290, 1677 | "metadata": {}, 1678 | "outputs": [], 1679 | "source": [ 1680 | "num=5\n", 1681 | "id1 = 'E'+str(num)\n", 1682 | "a = new_in[new_in['企业代号']==id1]\n", 1683 | "a = a.reset_index(drop=True)\n", 1684 | " \n", 1685 | "end_1 = a['开票日期'][len(a)-1].year\n", 1686 | "begin_1 = a['开票日期'][0].year\n", 1687 | " \n", 1688 | "begin_ji_1 = a[(a['开票日期']>str(begin_1))&(a['开票日期']str(end_1))&(a['开票日期']str(begin_2))&(b['开票日期']str(end_2))&(b['开票日期']\n", 1957 | "\n", 1970 | "\n", 1971 | " \n", 1972 | " \n", 1973 | " \n", 1974 | " \n", 1975 | " \n", 1976 | " \n", 1977 | " \n", 1978 | " \n", 1979 | " \n", 1980 | " \n", 1981 | " \n", 1982 | " \n", 1983 | " \n", 1984 | " \n", 1985 | " \n", 1986 | " \n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | " \n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | " \n", 2004 | " \n", 2005 | " \n", 2006 | " \n", 2007 | " \n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | "
企业名称进项发票的作废比例销项发票的作废比例
0E1240.1233130.150039
1E1250.1238170.134796
2E1260.0337710.166227
3E1270.0179310.030303
4E1280.0273120.074900
............
297E4210.0000000.034483
298E4220.0000000.100000
299E4230.0000000.142857
300E4240.0000000.139535
301E4250.0169490.444444
\n", 2048 | "

302 rows × 3 columns

\n", 2049 | "" 2050 | ], 2051 | "text/plain": [ 2052 | " 企业名称 进项发票的作废比例 销项发票的作废比例\n", 2053 | "0 E124 0.123313 0.150039\n", 2054 | "1 E125 0.123817 0.134796\n", 2055 | "2 E126 0.033771 0.166227\n", 2056 | "3 E127 0.017931 0.030303\n", 2057 | "4 E128 0.027312 0.074900\n", 2058 | ".. ... ... ...\n", 2059 | "297 E421 0.000000 0.034483\n", 2060 | "298 E422 0.000000 0.100000\n", 2061 | "299 E423 0.000000 0.142857\n", 2062 | "300 E424 0.000000 0.139535\n", 2063 | "301 E425 0.016949 0.444444\n", 2064 | "\n", 2065 | "[302 rows x 3 columns]" 2066 | ] 2067 | }, 2068 | "execution_count": 10, 2069 | "metadata": {}, 2070 | "output_type": "execute_result" 2071 | } 2072 | ], 2073 | "source": [ 2074 | "data = {'企业名称':name,\n", 2075 | " '进项发票的作废比例':r_in ,\n", 2076 | " '销项发票的作废比例':r_out \n", 2077 | "}\n", 2078 | "\n", 2079 | "df=pd.DataFrame(data=data)\n", 2080 | "df" 2081 | ] 2082 | }, 2083 | { 2084 | "cell_type": "code", 2085 | "execution_count": 11, 2086 | "metadata": {}, 2087 | "outputs": [], 2088 | "source": [ 2089 | "df.to_csv('第二问作废比例.csv',encoding='gbk')" 2090 | ] 2091 | }, 2092 | { 2093 | "cell_type": "code", 2094 | "execution_count": 12, 2095 | "metadata": {}, 2096 | "outputs": [], 2097 | "source": [ 2098 | "new_in = input1[(input1['开票日期']<'2020')] # 2020年之前的进项数据\n", 2099 | "new_out = output1[(output1['开票日期']<'2020')]" 2100 | ] 2101 | }, 2102 | { 2103 | "cell_type": "code", 2104 | "execution_count": 13, 2105 | "metadata": {}, 2106 | "outputs": [], 2107 | "source": [ 2108 | "ur = []\n", 2109 | "\n", 2110 | "dd = []\n", 2111 | "for num in range(1,303): # 遍历目标企业\n", 2112 | " \n", 2113 | " id1 = 'E'+str(num+123)\n", 2114 | " a = new_in[new_in['企业代号']==id1]\n", 2115 | " a = a.reset_index(drop=True)\n", 2116 | " \n", 2117 | " end_1 = a['开票日期'][len(a)-1].year\n", 2118 | " begin_1 = a['开票日期'][0].year\n", 2119 | " \n", 2120 | " begin_ji_1 = a[(a['开票日期']>str(begin_1))&(a['开票日期']str(end_1))&(a['开票日期']str(begin_2))&(b['开票日期']str(end_2))&(b['开票日期']\n", 2151 | "\n", 2164 | "\n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | "
绝对数变化比例变化
0-1.065813e+08-2.086819
1-1.697853e+08-1.069323
21.540513e+082.527125
3-8.365728e+070.677000
48.388073e+072.918189
.........
297-5.554860e+040.110407
2981.892226e+046.340986
2991.143155e+040.214670
300-8.175862e+042.007337
301-1.218773e+05-0.132198
\n", 2230 | "

302 rows × 2 columns

\n", 2231 | "" 2232 | ], 2233 | "text/plain": [ 2234 | " 绝对数变化 比例变化\n", 2235 | "0 -1.065813e+08 -2.086819\n", 2236 | "1 -1.697853e+08 -1.069323\n", 2237 | "2 1.540513e+08 2.527125\n", 2238 | "3 -8.365728e+07 0.677000\n", 2239 | "4 8.388073e+07 2.918189\n", 2240 | ".. ... ...\n", 2241 | "297 -5.554860e+04 0.110407\n", 2242 | "298 1.892226e+04 6.340986\n", 2243 | "299 1.143155e+04 0.214670\n", 2244 | "300 -8.175862e+04 2.007337\n", 2245 | "301 -1.218773e+05 -0.132198\n", 2246 | "\n", 2247 | "[302 rows x 2 columns]" 2248 | ] 2249 | }, 2250 | "execution_count": 14, 2251 | "metadata": {}, 2252 | "output_type": "execute_result" 2253 | } 2254 | ], 2255 | "source": [ 2256 | "data3 = {'绝对数变化':dd,\n", 2257 | " '比例变化':ur}\n", 2258 | "df3 = pd.DataFrame(data=data3)\n", 2259 | "df3" 2260 | ] 2261 | }, 2262 | { 2263 | "cell_type": "code", 2264 | "execution_count": 15, 2265 | "metadata": {}, 2266 | "outputs": [ 2267 | { 2268 | "data": { 2269 | "text/html": [ 2270 | "
\n", 2271 | "\n", 2284 | "\n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | " \n", 2291 | " \n", 2292 | " \n", 2293 | " \n", 2294 | " \n", 2295 | " \n", 2296 | " \n", 2297 | " \n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | " \n", 2343 | " \n", 2344 | " \n", 2345 | " \n", 2346 | " \n", 2347 | " \n", 2348 | " \n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | "
绝对数变化比例变化是否扭亏为盈利是否变为亏损
0-1.065813e+08-2.0868190.01.0
1-1.697853e+08-1.0693230.01.0
21.540513e+082.5271250.00.0
3-8.365728e+070.6770000.00.0
48.388073e+072.9181890.00.0
...............
297-5.554860e+040.1104070.00.0
2981.892226e+046.3409860.00.0
2991.143155e+040.2146700.00.0
300-8.175862e+042.0073370.00.0
301-1.218773e+05-0.1321980.01.0
\n", 2374 | "

302 rows × 4 columns

\n", 2375 | "
" 2376 | ], 2377 | "text/plain": [ 2378 | " 绝对数变化 比例变化 是否扭亏为盈利 是否变为亏损\n", 2379 | "0 -1.065813e+08 -2.086819 0.0 1.0\n", 2380 | "1 -1.697853e+08 -1.069323 0.0 1.0\n", 2381 | "2 1.540513e+08 2.527125 0.0 0.0\n", 2382 | "3 -8.365728e+07 0.677000 0.0 0.0\n", 2383 | "4 8.388073e+07 2.918189 0.0 0.0\n", 2384 | ".. ... ... ... ...\n", 2385 | "297 -5.554860e+04 0.110407 0.0 0.0\n", 2386 | "298 1.892226e+04 6.340986 0.0 0.0\n", 2387 | "299 1.143155e+04 0.214670 0.0 0.0\n", 2388 | "300 -8.175862e+04 2.007337 0.0 0.0\n", 2389 | "301 -1.218773e+05 -0.132198 0.0 1.0\n", 2390 | "\n", 2391 | "[302 rows x 4 columns]" 2392 | ] 2393 | }, 2394 | "execution_count": 15, 2395 | "metadata": {}, 2396 | "output_type": "execute_result" 2397 | } 2398 | ], 2399 | "source": [ 2400 | "for i in range(len(df3)):\n", 2401 | " if (df3.loc[i,'绝对数变化']>0) & (df3.loc[i,'比例变化']<0): # 说明扭亏为盈\n", 2402 | " df3.loc[i,'是否扭亏为盈利']=1\n", 2403 | " else:\n", 2404 | " df3.loc[i,'是否扭亏为盈利']=0\n", 2405 | "\n", 2406 | "for i in range(len(df3)):\n", 2407 | " if (df3.loc[i,'绝对数变化']<0) & (df3.loc[i,'比例变化']<0): # 说明扭亏为盈\n", 2408 | " df3.loc[i,'是否变为亏损']=1\n", 2409 | " else:\n", 2410 | " df3.loc[i,'是否变为亏损']=0 \n", 2411 | "\n", 2412 | "df3" 2413 | ] 2414 | }, 2415 | { 2416 | "cell_type": "code", 2417 | "execution_count": 16, 2418 | "metadata": {}, 2419 | "outputs": [], 2420 | "source": [ 2421 | "df3.to_csv('第二问绝对数和比例变化.csv',encoding='gbk')" 2422 | ] 2423 | }, 2424 | { 2425 | "cell_type": "code", 2426 | "execution_count": null, 2427 | "metadata": {}, 2428 | "outputs": [], 2429 | "source": [] 2430 | } 2431 | ], 2432 | "metadata": { 2433 | "kernelspec": { 2434 | "display_name": "Python [conda env:root] *", 2435 | "language": "python", 2436 | "name": "conda-root-py" 2437 | }, 2438 | "language_info": { 2439 | "codemirror_mode": { 2440 | "name": "ipython", 2441 | "version": 3 2442 | }, 2443 | "file_extension": ".py", 2444 | "mimetype": "text/x-python", 2445 | "name": "python", 2446 | "nbconvert_exporter": "python", 2447 | "pygments_lexer": "ipython3", 2448 | "version": "3.7.6" 2449 | }, 2450 | "toc": { 2451 | "base_numbering": 1, 2452 | "nav_menu": {}, 2453 | "number_sections": true, 2454 | "sideBar": true, 2455 | "skip_h1_title": false, 2456 | "title_cell": "Table of Contents", 2457 | "title_sidebar": "Contents", 2458 | "toc_cell": false, 2459 | "toc_position": {}, 2460 | "toc_section_display": true, 2461 | "toc_window_display": false 2462 | } 2463 | }, 2464 | "nbformat": 4, 2465 | "nbformat_minor": 4 2466 | } 2467 | -------------------------------------------------------------------------------- /论文.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leek-emperor/2020-CMCM-C/1d8a26c3a095726d45e107581c4bb204992b6e28/论文.pdf --------------------------------------------------------------------------------