├── README.md
├── baseline.ipynb
├── 第一、二问集成学习算法.ipynb
├── 第一问信用评级的描述性统计.ipynb
├── 第一问所有特征.csv
├── 第一问风险违约率求解.ipynb
├── 第一问:作废发票计算.ipynb
└── 论文.pdf
/README.md:
--------------------------------------------------------------------------------
1 | # 2020-CMCM-C
2 | 2020年数学建模国赛C题国一优秀论文——编号C227:银行对中小微企业的信贷决策分析
3 |
4 | # 比赛的代码
5 | 这个项目包含了C227这篇论文的代码和特征工程之后的数据集,特征工程的全部代码我找不到了,所以我放上了第一问的csv文件,在new_code文件夹下。
6 | 论文中有比赛期间我们的全部代码,***但是我不确定有没有问题hhh***
7 | 模型训练部分的代码其实有些问题,比如我们用了全部数据集做评估,而不是测试集。
8 |
9 | # 更新后的代码(baseline.ipynb)
10 | baseline中的代码是更改之后的代码,我尝试使用了SMOTETomek(一种过采样+欠采样)的方法,
11 | Stacking融合之后的AUC还是可以达到***0.98***的样子的,F1在0.88左右,基分类器选的是lgb,rf和gbdt,其实效果不算很好,
12 | 但针对于银行信贷来说,recall是很重要的,所幸stacking后的recall是1
13 |
14 | 我也尝试用了lgb单模型,可以看见在非平衡数据上lgb还是非常给力的。
15 |
16 | 希望这些代码能给你提供帮助
17 |
--------------------------------------------------------------------------------
/baseline.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 60,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "import seaborn as sns\n",
13 | "from collections import Counter\n",
14 | "from imblearn.combine import SMOTETomek # 过采样+欠采样结合的方法处理非平衡数据集\n",
15 | "\n",
16 | "from sklearn.linear_model import LogisticRegression\n",
17 | "from sklearn.metrics import precision_score,recall_score\n",
18 | "from sklearn.model_selection import train_test_split\n",
19 | "from sklearn.preprocessing import StandardScaler\n",
20 | "from sklearn.model_selection import GridSearchCV\n",
21 | "from sklearn import metrics\n",
22 | "from sklearn.ensemble import AdaBoostClassifier as ada\n",
23 | "from sklearn.ensemble import GradientBoostingClassifier\n",
24 | "from sklearn.svm import SVC\n",
25 | "from sklearn.ensemble import RandomForestClassifier as RF\n",
26 | "from sklearn.model_selection import cross_val_score\n",
27 | "from sklearn.metrics import roc_auc_score\n",
28 | "from sklearn.ensemble import VotingClassifier\n",
29 | "from mlxtend.classifier import StackingClassifier\n",
30 | "from mlxtend.classifier import StackingCVClassifier\n",
31 | "\n",
32 | "# Voting \n",
33 | "data = pd.read_csv('第一问所有特征.csv',encoding='utf-8',index_col='企业代号')\n",
34 | "for i in range(len(data)):\n",
35 | " a='E'+str(i+1)\n",
36 | " if data.loc[a,'是否违约']=='否':\n",
37 | " data.loc[a,'违约']=0\n",
38 | " else :\n",
39 | " data.loc[a,'违约']=1\n",
40 | "\n",
41 | "x = data.iloc[:,:-4].values\n",
42 | "y = data.iloc[:,-1].values"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 61,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=30)\n",
52 | "smote_tomek = SMOTETomek(random_state=0)\n",
53 | "x_train, y_train = smote_tomek.fit_resample(x_train, y_train)"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 62,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "from lightgbm import LGBMClassifier\n",
63 | "\n",
64 | "lgb = LGBMClassifier(learning_rate=0.05,n_estimators=100,objective='binary',\n",
65 | " boosting_type='gbdt',\n",
66 | " num_leaves=2**5,\n",
67 | " max_depth=5,reg_alpha=0.5,reg_lambda=0.5,\n",
68 | " metric='auc',subsample=0.75)\n",
69 | "\n",
70 | "LR = LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,\n",
71 | " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
72 | " multi_class='auto', n_jobs=None, penalty='l2',\n",
73 | " random_state=None, solver='newton-cg', tol=0.0001, verbose=0,\n",
74 | " warm_start=False)\n",
75 | "Ada = ada(algorithm='SAMME', base_estimator=None, learning_rate=0.1,\n",
76 | " n_estimators=100, random_state=30)\n",
77 | "GBDT = GradientBoostingClassifier(ccp_alpha=0.0, learning_rate=0.7, max_depth=3)\n",
78 | "svc = SVC(C=0.8, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,\n",
79 | " decision_function_shape='ovr', degree=3, gamma=20, kernel='rbf',\n",
80 | " max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,\n",
81 | " verbose=False)\n",
82 | "\n",
83 | "rf = RF(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
84 | " criterion='gini', max_depth=None, max_features='auto',\n",
85 | " max_leaf_nodes=None, max_samples=None,\n",
86 | " min_impurity_decrease=0.0,min_samples_leaf=1, min_samples_split=2,\n",
87 | " min_weight_fraction_leaf=0.0, n_estimators=100,\n",
88 | " n_jobs=None, oob_score=False, random_state=30, verbose=0,\n",
89 | " warm_start=False)\n",
90 | "\n",
91 | "\n",
92 | "sclf = StackingCVClassifier(classifiers=[Ada, GBDT, LR,rf],\n",
93 | " use_probas=True,\n",
94 | " meta_classifier=svc,\n",
95 | " random_state=30)"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 63,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "tranfer = StandardScaler()\n",
105 | "x = tranfer.fit_transform(x)\n",
106 | "x_train = tranfer.transform(x_train)\n",
107 | "x_test = tranfer.transform(x_test)"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 64,
113 | "metadata": {},
114 | "outputs": [
115 | {
116 | "data": {
117 | "text/plain": [
118 | "AdaBoostClassifier(algorithm='SAMME', learning_rate=0.1, n_estimators=100,\n",
119 | " random_state=30)"
120 | ]
121 | },
122 | "execution_count": 64,
123 | "metadata": {},
124 | "output_type": "execute_result"
125 | }
126 | ],
127 | "source": [
128 | "Ada"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 65,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "data": {
138 | "text/html": [
139 | "
\n",
140 | "\n",
153 | "
\n",
154 | " \n",
155 | " \n",
156 | " | \n",
157 | " train_Accuracy | \n",
158 | " test_Accuracy | \n",
159 | " Precision | \n",
160 | " Recall | \n",
161 | " AUC | \n",
162 | " f1 | \n",
163 | "
\n",
164 | " \n",
165 | " \n",
166 | " \n",
167 | " LR | \n",
168 | " 0.867188 | \n",
169 | " 0.838710 | \n",
170 | " 0.400000 | \n",
171 | " 0.50 | \n",
172 | " 0.759259 | \n",
173 | " 0.444444 | \n",
174 | "
\n",
175 | " \n",
176 | " Ada | \n",
177 | " 0.953125 | \n",
178 | " 0.741935 | \n",
179 | " 0.333333 | \n",
180 | " 1.00 | \n",
181 | " 0.847222 | \n",
182 | " 0.500000 | \n",
183 | "
\n",
184 | " \n",
185 | " GBDT | \n",
186 | " 1.000000 | \n",
187 | " 0.806452 | \n",
188 | " 0.375000 | \n",
189 | " 0.75 | \n",
190 | " 0.907407 | \n",
191 | " 0.500000 | \n",
192 | "
\n",
193 | " \n",
194 | " svc | \n",
195 | " 1.000000 | \n",
196 | " 0.903226 | \n",
197 | " 1.000000 | \n",
198 | " 0.25 | \n",
199 | " 0.500000 | \n",
200 | " 0.400000 | \n",
201 | "
\n",
202 | " \n",
203 | " rf | \n",
204 | " 1.000000 | \n",
205 | " 0.838710 | \n",
206 | " 0.444444 | \n",
207 | " 1.00 | \n",
208 | " 0.953704 | \n",
209 | " 0.615385 | \n",
210 | "
\n",
211 | " \n",
212 | " LightGBM | \n",
213 | " 0.984375 | \n",
214 | " 0.774194 | \n",
215 | " 0.333333 | \n",
216 | " 0.75 | \n",
217 | " 0.916667 | \n",
218 | " 0.461538 | \n",
219 | "
\n",
220 | " \n",
221 | " StackingClassifier | \n",
222 | " 0.984375 | \n",
223 | " 0.967742 | \n",
224 | " 0.800000 | \n",
225 | " 1.00 | \n",
226 | " 0.981481 | \n",
227 | " 0.888889 | \n",
228 | "
\n",
229 | " \n",
230 | "
\n",
231 | "
"
232 | ],
233 | "text/plain": [
234 | " train_Accuracy test_Accuracy Precision Recall \\\n",
235 | "LR 0.867188 0.838710 0.400000 0.50 \n",
236 | "Ada 0.953125 0.741935 0.333333 1.00 \n",
237 | "GBDT 1.000000 0.806452 0.375000 0.75 \n",
238 | "svc 1.000000 0.903226 1.000000 0.25 \n",
239 | "rf 1.000000 0.838710 0.444444 1.00 \n",
240 | "LightGBM 0.984375 0.774194 0.333333 0.75 \n",
241 | "StackingClassifier 0.984375 0.967742 0.800000 1.00 \n",
242 | "\n",
243 | " AUC f1 \n",
244 | "LR 0.759259 0.444444 \n",
245 | "Ada 0.847222 0.500000 \n",
246 | "GBDT 0.907407 0.500000 \n",
247 | "svc 0.500000 0.400000 \n",
248 | "rf 0.953704 0.615385 \n",
249 | "LightGBM 0.916667 0.461538 \n",
250 | "StackingClassifier 0.981481 0.888889 "
251 | ]
252 | },
253 | "execution_count": 65,
254 | "metadata": {},
255 | "output_type": "execute_result"
256 | }
257 | ],
258 | "source": [
259 | "sclf = StackingCVClassifier(classifiers=[lgb, GBDT,rf],\n",
260 | " use_probas=True,\n",
261 | " meta_classifier=svc,\n",
262 | " random_state=30)\n",
263 | "\n",
264 | "zhibiao = {}\n",
265 | "\n",
266 | "# weight = []\n",
267 | "for clf, label in zip([LR, Ada, GBDT, svc, rf, lgb,sclf],\n",
268 | " ['LR',\n",
269 | " 'Ada',\n",
270 | " 'GBDT',\n",
271 | " 'svc',\n",
272 | " 'rf', 'LightGBM','StackingClassifier']):\n",
273 | " clf.fit(x_train, y_train)\n",
274 | " y_predict = clf.predict(x_test)\n",
275 | "# print('{}在预测集模型的准确率为:\\n'.format(label), metrics.accuracy_score(y_test, y_predict))\n",
276 | "# print('{}在训练集模型的准确率为:\\n'.format(label), metrics.accuracy_score(y_train, clf.predict(x_train)))\n",
277 | "# print('{}的综合准确率为:\\n'.format(label), metrics.accuracy_score(y, clf.predict(x)))\n",
278 | "# print('{}的Precision为:'.format(label), precision_score(y_test, y_predict))\n",
279 | "# print('{}的Recall为:'.format(label), recall_score(y_test, y_predict))\n",
280 | " tem = metrics.roc_auc_score(y_test, y_predict)\n",
281 | "# weight.append(tem)\n",
282 | "# print('{}的ROC面积为:'.format(label), metrics.roc_auc_score(y_test, clf.predict_proba(x_test)[:,1]))\n",
283 | "# print('{}的f1值为:'.format(label), metrics.f1_score(y_test, y_predict))\n",
284 | "# print()\n",
285 | " \n",
286 | " tem_1 = [metrics.accuracy_score(y_train, clf.predict(x_train)),metrics.accuracy_score(y_test, y_predict),\n",
287 | " precision_score(y_test, y_predict),recall_score(y_test, y_predict),\n",
288 | " metrics.roc_auc_score(y_test, clf.predict_proba(x_test)[:,1]),metrics.f1_score(y_test, y_predict)]\n",
289 | " zhibiao[label]=tem_1\n",
290 | "data2 = pd.DataFrame(data=zhibiao,index=['train_Accuracy','test_Accuracy',\n",
291 | " 'Precision','Recall','AUC','f1']).T\n",
292 | "data2"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": 57,
298 | "metadata": {},
299 | "outputs": [
300 | {
301 | "data": {
302 | "text/html": [
303 | "\n",
304 | "\n",
317 | "
\n",
318 | " \n",
319 | " \n",
320 | " | \n",
321 | " train_Accuracy | \n",
322 | " test_Accuracy | \n",
323 | " Precision | \n",
324 | " Recall | \n",
325 | " AUC | \n",
326 | " f1 | \n",
327 | "
\n",
328 | " \n",
329 | " \n",
330 | " \n",
331 | " LightGBM | \n",
332 | " 0.956522 | \n",
333 | " 0.967742 | \n",
334 | " 0.8 | \n",
335 | " 1.0 | \n",
336 | " 0.805556 | \n",
337 | " 0.888889 | \n",
338 | "
\n",
339 | " \n",
340 | "
\n",
341 | "
"
342 | ],
343 | "text/plain": [
344 | " train_Accuracy test_Accuracy Precision Recall AUC f1\n",
345 | "LightGBM 0.956522 0.967742 0.8 1.0 0.805556 0.888889"
346 | ]
347 | },
348 | "execution_count": 57,
349 | "metadata": {},
350 | "output_type": "execute_result"
351 | }
352 | ],
353 | "source": [
354 | "# LGB单模(对非平衡数据集设置is_unbalance)\n",
355 | "x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=30)\n",
356 | "clf = LGBMClassifier(learning_rate=0.05,n_estimators=100,objective='binary',\n",
357 | " boosting_type='gbdt',\n",
358 | " num_leaves=2**5,\n",
359 | " max_depth=5,reg_alpha=0.5,reg_lambda=0.5,is_unbalance=True,\n",
360 | " metric='auc',subsample=0.75)\n",
361 | "clf.fit(x_train,y_train)\n",
362 | "pd.DataFrame(data=[metrics.accuracy_score(y_train, clf.predict(x_train)),metrics.accuracy_score(y_test, y_predict),\n",
363 | " precision_score(y_test, y_predict),recall_score(y_test, y_predict),\n",
364 | " metrics.roc_auc_score(y_test, clf.predict_proba(x_test)[:,1]),metrics.f1_score(y_test, y_predict)],index=['train_Accuracy','test_Accuracy',\n",
365 | " 'Precision','Recall','AUC','f1'],columns=['LightGBM']).T"
366 | ]
367 | }
368 | ],
369 | "metadata": {
370 | "interpreter": {
371 | "hash": "07efdcd4b820c98a756949507a4d29d7862823915ec7477944641bea022f4f62"
372 | },
373 | "kernelspec": {
374 | "display_name": "Python 3.8.8 ('base')",
375 | "language": "python",
376 | "name": "python3"
377 | },
378 | "language_info": {
379 | "codemirror_mode": {
380 | "name": "ipython",
381 | "version": 3
382 | },
383 | "file_extension": ".py",
384 | "mimetype": "text/x-python",
385 | "name": "python",
386 | "nbconvert_exporter": "python",
387 | "pygments_lexer": "ipython3",
388 | "version": "3.8.8"
389 | },
390 | "orig_nbformat": 4
391 | },
392 | "nbformat": 4,
393 | "nbformat_minor": 2
394 | }
395 |
--------------------------------------------------------------------------------
/第一问信用评级的描述性统计.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 3,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "data": {
20 | "text/html": [
21 | "\n",
22 | "\n",
35 | "
\n",
36 | " \n",
37 | " \n",
38 | " | \n",
39 | " 违约概率 | \n",
40 | " 信誉评级 | \n",
41 | "
\n",
42 | " \n",
43 | " \n",
44 | " \n",
45 | " 0 | \n",
46 | " 0.144146 | \n",
47 | " A | \n",
48 | "
\n",
49 | " \n",
50 | " 1 | \n",
51 | " 0.109683 | \n",
52 | " A | \n",
53 | "
\n",
54 | " \n",
55 | " 2 | \n",
56 | " 0.087636 | \n",
57 | " C | \n",
58 | "
\n",
59 | " \n",
60 | " 3 | \n",
61 | " 0.100442 | \n",
62 | " C | \n",
63 | "
\n",
64 | " \n",
65 | " 4 | \n",
66 | " 0.132734 | \n",
67 | " B | \n",
68 | "
\n",
69 | " \n",
70 | " ... | \n",
71 | " ... | \n",
72 | " ... | \n",
73 | "
\n",
74 | " \n",
75 | " 118 | \n",
76 | " 0.690629 | \n",
77 | " D | \n",
78 | "
\n",
79 | " \n",
80 | " 119 | \n",
81 | " 0.863239 | \n",
82 | " D | \n",
83 | "
\n",
84 | " \n",
85 | " 120 | \n",
86 | " 0.669896 | \n",
87 | " D | \n",
88 | "
\n",
89 | " \n",
90 | " 121 | \n",
91 | " 0.813119 | \n",
92 | " D | \n",
93 | "
\n",
94 | " \n",
95 | " 122 | \n",
96 | " 0.858223 | \n",
97 | " D | \n",
98 | "
\n",
99 | " \n",
100 | "
\n",
101 | "
123 rows × 2 columns
\n",
102 | "
"
103 | ],
104 | "text/plain": [
105 | " 违约概率 信誉评级\n",
106 | "0 0.144146 A\n",
107 | "1 0.109683 A\n",
108 | "2 0.087636 C\n",
109 | "3 0.100442 C\n",
110 | "4 0.132734 B\n",
111 | ".. ... ...\n",
112 | "118 0.690629 D\n",
113 | "119 0.863239 D\n",
114 | "120 0.669896 D\n",
115 | "121 0.813119 D\n",
116 | "122 0.858223 D\n",
117 | "\n",
118 | "[123 rows x 2 columns]"
119 | ]
120 | },
121 | "execution_count": 3,
122 | "metadata": {},
123 | "output_type": "execute_result"
124 | }
125 | ],
126 | "source": [
127 | "data = pd.read_csv('违约风险.csv',encoding='gbk')\n",
128 | "data"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 17,
134 | "metadata": {},
135 | "outputs": [],
136 | "source": [
137 | "A_max = data[data['信誉评级']=='A']['违约概率'].max()\n",
138 | "A_min = data[data['信誉评级']=='A']['违约概率'].min()\n",
139 | "\n",
140 | "B_max = data[data['信誉评级']=='B']['违约概率'].max()\n",
141 | "B_min = data[data['信誉评级']=='B']['违约概率'].min()\n",
142 | "\n",
143 | "C_max = data[data['信誉评级']=='C']['违约概率'].max()\n",
144 | "C_min = data[data['信誉评级']=='C']['违约概率'].min()\n",
145 | "\n",
146 | "D_max = data[data['信誉评级']=='D']['违约概率'].max()\n",
147 | "D_min = data[data['信誉评级']=='D']['违约概率'].min()"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 18,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "data": {
157 | "text/html": [
158 | "\n",
159 | "\n",
172 | "
\n",
173 | " \n",
174 | " \n",
175 | " | \n",
176 | " A | \n",
177 | " B | \n",
178 | " C | \n",
179 | " D | \n",
180 | "
\n",
181 | " \n",
182 | " \n",
183 | " \n",
184 | " Max | \n",
185 | " 0.434712 | \n",
186 | " 0.756349 | \n",
187 | " 0.638270 | \n",
188 | " 0.882178 | \n",
189 | "
\n",
190 | " \n",
191 | " Min | \n",
192 | " 0.078757 | \n",
193 | " 0.100952 | \n",
194 | " 0.087636 | \n",
195 | " 0.237270 | \n",
196 | "
\n",
197 | " \n",
198 | "
\n",
199 | "
"
200 | ],
201 | "text/plain": [
202 | " A B C D\n",
203 | "Max 0.434712 0.756349 0.638270 0.882178\n",
204 | "Min 0.078757 0.100952 0.087636 0.237270"
205 | ]
206 | },
207 | "execution_count": 18,
208 | "metadata": {},
209 | "output_type": "execute_result"
210 | }
211 | ],
212 | "source": [
213 | "dd = {'A':[A_max,A_min],\n",
214 | " 'B':[B_max,B_min],\n",
215 | " 'C':[C_max,C_min],\n",
216 | " 'D':[D_max,D_min]}\n",
217 | "\n",
218 | "d1 = pd.DataFrame(data=dd,index=['Max','Min'])\n",
219 | "d1"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 27,
225 | "metadata": {},
226 | "outputs": [
227 | {
228 | "data": {
229 | "text/html": [
230 | "\n",
231 | "\n",
244 | "
\n",
245 | " \n",
246 | " \n",
247 | " | \n",
248 | " A | \n",
249 | " B | \n",
250 | " C | \n",
251 | " D | \n",
252 | "
\n",
253 | " \n",
254 | " \n",
255 | " \n",
256 | " count | \n",
257 | " 27.000000 | \n",
258 | " 38.000000 | \n",
259 | " 34.000000 | \n",
260 | " 24.000000 | \n",
261 | "
\n",
262 | " \n",
263 | " mean | \n",
264 | " 0.145062 | \n",
265 | " 0.181696 | \n",
266 | " 0.208571 | \n",
267 | " 0.709669 | \n",
268 | "
\n",
269 | " \n",
270 | " std | \n",
271 | " 0.067785 | \n",
272 | " 0.107809 | \n",
273 | " 0.146612 | \n",
274 | " 0.169678 | \n",
275 | "
\n",
276 | " \n",
277 | " min | \n",
278 | " 0.078757 | \n",
279 | " 0.100952 | \n",
280 | " 0.087636 | \n",
281 | " 0.237270 | \n",
282 | "
\n",
283 | " \n",
284 | " 25% | \n",
285 | " 0.110293 | \n",
286 | " 0.137316 | \n",
287 | " 0.121182 | \n",
288 | " 0.651753 | \n",
289 | "
\n",
290 | " \n",
291 | " 50% | \n",
292 | " 0.128696 | \n",
293 | " 0.161150 | \n",
294 | " 0.156689 | \n",
295 | " 0.780061 | \n",
296 | "
\n",
297 | " \n",
298 | " 75% | \n",
299 | " 0.162311 | \n",
300 | " 0.191359 | \n",
301 | " 0.198689 | \n",
302 | " 0.814742 | \n",
303 | "
\n",
304 | " \n",
305 | " max | \n",
306 | " 0.434712 | \n",
307 | " 0.756349 | \n",
308 | " 0.638270 | \n",
309 | " 0.882178 | \n",
310 | "
\n",
311 | " \n",
312 | "
\n",
313 | "
"
314 | ],
315 | "text/plain": [
316 | " A B C D\n",
317 | "count 27.000000 38.000000 34.000000 24.000000\n",
318 | "mean 0.145062 0.181696 0.208571 0.709669\n",
319 | "std 0.067785 0.107809 0.146612 0.169678\n",
320 | "min 0.078757 0.100952 0.087636 0.237270\n",
321 | "25% 0.110293 0.137316 0.121182 0.651753\n",
322 | "50% 0.128696 0.161150 0.156689 0.780061\n",
323 | "75% 0.162311 0.191359 0.198689 0.814742\n",
324 | "max 0.434712 0.756349 0.638270 0.882178"
325 | ]
326 | },
327 | "execution_count": 27,
328 | "metadata": {},
329 | "output_type": "execute_result"
330 | }
331 | ],
332 | "source": [
333 | "A_des = data[data['信誉评级']=='A']['违约概率'].describe()\n",
334 | "B_des = data[data['信誉评级']=='B']['违约概率'].describe()\n",
335 | "C_des = data[data['信誉评级']=='C']['违约概率'].describe()\n",
336 | "D_des = data[data['信誉评级']=='D']['违约概率'].describe()\n",
337 | "\n",
338 | "dd2 = {'A':A_des,'B':B_des,'C':C_des,'D':D_des}\n",
339 | "d2 = pd.DataFrame(data=dd2)\n",
340 | "d2"
341 | ]
342 | },
343 | {
344 | "cell_type": "code",
345 | "execution_count": 29,
346 | "metadata": {},
347 | "outputs": [],
348 | "source": [
349 | "d2.to_csv('各类信誉评级的违约风险的描述性统计分析.csv',encoding='gbk')"
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": null,
355 | "metadata": {},
356 | "outputs": [],
357 | "source": []
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": null,
362 | "metadata": {},
363 | "outputs": [],
364 | "source": []
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": null,
369 | "metadata": {},
370 | "outputs": [],
371 | "source": []
372 | }
373 | ],
374 | "metadata": {
375 | "kernelspec": {
376 | "display_name": "Python [conda env:root] *",
377 | "language": "python",
378 | "name": "conda-root-py"
379 | },
380 | "language_info": {
381 | "codemirror_mode": {
382 | "name": "ipython",
383 | "version": 3
384 | },
385 | "file_extension": ".py",
386 | "mimetype": "text/x-python",
387 | "name": "python",
388 | "nbconvert_exporter": "python",
389 | "pygments_lexer": "ipython3",
390 | "version": "3.7.6"
391 | },
392 | "toc": {
393 | "base_numbering": 1,
394 | "nav_menu": {},
395 | "number_sections": true,
396 | "sideBar": true,
397 | "skip_h1_title": false,
398 | "title_cell": "Table of Contents",
399 | "title_sidebar": "Contents",
400 | "toc_cell": false,
401 | "toc_position": {},
402 | "toc_section_display": true,
403 | "toc_window_display": false
404 | }
405 | },
406 | "nbformat": 4,
407 | "nbformat_minor": 4
408 | }
409 |
--------------------------------------------------------------------------------
/第一问所有特征.csv:
--------------------------------------------------------------------------------
1 | 企业代号,销-进金额,增值税,销1,销2,销3,销4,进1,进2,进3,进4,进项发票的作废比例,销项发票的作废比例,绝对数变化,比例变化,是否扭亏为盈利,是否变为亏损,下属部门,分公司 ,公司,个体经营,评级,信誉评级,是否违约
2 | E1,-1898184082,0,179,51,51,71,332,74,27,3,0.055797733,0.027620222,-193872136,1.720613861,0,0,0,0,1,0,1,A,否
3 | E2,426297552.3,28261615.82,1147,268,121,43,2424,780,314,103,0.022421943,0.082002046,110302536.5,2.197674176,0,0,0,0,1,0,1,A,否
4 | E3,514987171,88562269.38,48,48,25,14,386,103,58,23,0.042534532,0.015993021,-115477286.1,0.516353348,0,0,0,1,0,0,3,C,否
5 | E4,1511803093,254330043.4,16,10,0,0,94,21,13,2,0.066308244,0.085163604,-1168509293,0.053642121,0,0,0,0,1,0,3,C,否
6 | E5,691527.38,548958.88,16,15,2,0,324,47,19,2,0.039188566,0.051886792,4068217.15,85.22708692,0,0,0,0,1,0,2,B,否
7 | E6,45673584.07,9086998.62,85,38,18,1,1170,235,96,13,0.046132134,0.132129278,14520912.32,6.220033757,0,0,0,0,1,0,1,A,否
8 | E7,450248079.4,79329020.55,1004,101,103,3,1043,392,126,10,0.034664427,0.01435759,-170352223.8,0.282999939,0,0,0,1,0,0,1,A,否
9 | E8,186366411.1,13359969.4,2051,428,169,31,1758,547,216,72,0.033679446,0.113103448,10175963.7,1.190343495,0,0,0,0,1,0,1,A,否
10 | E9,294700968.1,48008699.68,26,28,24,45,497,138,72,26,0.02189611,0.024720623,8701373.76,1.102641628,0,0,0,1,0,0,1,A,否
11 | E10,334674157.2,10308763.71,14,2,7,3,707,122,51,0,0.057847205,0.091549296,82858784.61,5.695238829,0,0,0,0,1,0,2,B,否
12 | E11,5011729.91,2085745.11,1,1,2,0,353,96,13,3,0.042665726,0.05908684,-38346246.26,0.062246969,0,0,0,0,1,0,3,C,否
13 | E12,119554241,12226064.87,1,3,1,0,298,77,16,2,0.021714922,0.077192982,-84557485.89,0.314231061,0,0,0,0,1,0,2,B,否
14 | E13,114691249.5,19162604.52,2699,342,111,25,456,150,73,43,0.015888336,0.149531543,17290905.08,1.624659042,0,0,0,0,1,0,1,A,否
15 | E14,86038310.36,17351210.64,499,176,145,0,609,170,126,0,0.077716547,0.071642686,8017782.6,1.260860968,0,0,0,0,0,1,3,C,否
16 | E15,207585880.2,7211027.52,0,0,1,1,27,5,1,0,0,0.063018242,28666792.4,2.030952353,0,0,0,0,1,0,1,A,否
17 | E16,209132769.2,4130668.31,34,9,8,2,63,11,3,0,0.003448276,0.111617312,39872630.5,4.427526479,0,0,0,0,1,0,1,A,否
18 | E17,12371632.79,0,136,37,6,3,633,165,67,4,0.056157505,0.170118343,10825143.39,4.609111121,0,0,0,0,1,0,1,A,否
19 | E18,50065149.39,0,32,5,8,1,507,94,38,2,0.053772767,0.094488189,17393819.63,25.37000438,0,0,0,0,1,0,1,A,否
20 | E19,-10663999.95,0,359,105,32,12,112,35,15,5,0.026246719,0.089706357,457386.04,1.295208096,0,0,0,0,1,0,1,A,否
21 | E20,-2938233.73,871931.22,20,1,3,1,51,10,8,1,0.078761062,0.051480051,4408596.54,3.448482477,0,0,0,0,1,0,2,B,否
22 | E21,-11541727.45,2010367.05,312,13,0,0,398,67,13,2,0.071799308,0.025920873,9020452.64,-107.8554226,1,0,0,0,1,0,2,B,否
23 | E22,39408223.26,1007767.46,29,12,3,1,54,17,8,1,0.055900621,0.125480154,34495523.8,-68.36411556,1,0,0,0,1,0,1,A,否
24 | E23,7810606.37,1343556.27,58,27,10,23,463,150,56,13,0.03009493,0.022969188,-6130097.84,-3.152684243,0,1,0,0,1,0,2,B,否
25 | E24,63461390.07,2581218.68,25,17,4,2,272,73,18,4,0.021006351,0.080808081,21065382.97,3.750814974,0,0,0,0,1,0,1,A,否
26 | E25,28738558.94,4790430.02,182,37,13,1,267,76,20,4,0.013475177,0.167689162,3950781.03,1.871990389,0,0,0,0,1,0,3,C,否
27 | E26,-1938217.2,0,29,12,1,0,80,23,9,5,0.041719343,0.107142857,801968.45,-1.535171354,1,0,0,0,1,0,1,A,否
28 | E27,-2964169.13,0,118,36,13,10,307,66,21,9,0.035087719,0.053814714,340587.89,-4.576462138,1,0,0,0,1,0,1,A,否
29 | E28,47653987.22,4585203.06,1,1,2,1,46,14,1,0,0.076923077,0.077328647,19643668.89,7.404078616,0,0,0,0,1,0,2,B,否
30 | E29,45010590.57,1778869.77,1,1,1,0,15,1,1,0,0.04,0.012631579,11138949.26,1.85630192,0,0,0,0,1,0,3,C,是
31 | E30,49372586.08,3403861.37,3,3,1,0,145,17,7,0,0.018567639,0.06625,2738199.31,1.234802735,0,0,0,0,1,0,2,B,否
32 | E31,42826768.62,6338508.75,64,28,37,22,87,10,5,5,0.019417476,0.041027607,7131326.48,1.679119834,0,0,0,0,1,0,1,A,否
33 | E32,41734977.49,1132413.07,9,0,1,0,207,49,16,0,0.005307856,0.152173913,11818843.08,2.063600841,0,0,0,0,1,0,2,B,否
34 | E33,-13685226.15,3473876.56,61,16,7,0,186,42,9,3,0.123743233,0.150046598,-1247606.58,0.512545099,0,0,0,0,1,0,2,B,否
35 | E34,30811458.94,1004529,56,6,2,0,379,56,12,0,0.103074924,0.157001414,-1442409.79,140.7276366,0,0,0,0,1,0,2,B,否
36 | E35,1307511.65,0,20,5,4,2,57,29,13,2,0.053811659,0.042857143,4954392.81,-11.28383944,1,0,0,0,1,0,2,B,否
37 | E36,14510309.69,1949388.05,87,60,51,0,319,62,36,0,0.045232274,0.076276665,5859723.15,3.214112585,0,0,0,0,1,0,4,D,是
38 | E37,-1828933.02,0,219,44,12,1,55,16,7,1,0.042071197,0.06991359,20100.51,0.945329365,0,0,0,0,1,0,2,B,否
39 | E38,33531866.64,617679.99,5,2,1,0,191,40,5,0,0.033299697,0.121019108,3241096.45,1.331340181,0,0,0,0,1,0,2,B,否
40 | E39,28820097.95,3723255.27,1,0,1,0,44,9,1,0,0.010638298,0.106060606,13935775.47,7.45272558,0,0,0,0,1,0,3,C,否
41 | E40,29114217.69,546512.99,452,31,0,0,641,111,29,13,0.025412961,0.093997735,13206723.03,6.115626836,0,0,0,0,1,0,3,C,否
42 | E41,26983203.31,460999.03,5,3,3,2,369,86,23,8,0.031972455,0.087481146,12254350.03,4.989140651,0,0,0,0,1,0,3,C,否
43 | E42,26225027.98,0,23,5,0,0,6,1,0,0,0,0.100917431,12910420.96,12.13534949,0,0,0,0,1,0,1,A,否
44 | E43,14107614.55,711302.84,3,2,0,0,15,4,0,0,0.128205128,0.126530612,1390239.32,1.182075685,0,0,0,0,1,0,2,B,否
45 | E44,2524250.34,326537.44,13,12,3,2,137,52,20,2,0.020833333,0.07926078,899638.66,6.562504224,0,0,0,0,1,0,3,C,否
46 | E45,459652.75,301217.76,55,8,0,0,92,19,3,0,0.013333333,0.061538462,-697757.92,-5.818097757,0,1,0,0,0,1,2,B,是
47 | E46,6666931.75,305058.31,104,27,8,1,342,69,21,3,0.020440252,0.087268994,1283506.09,12.06990003,0,0,0,0,1,0,3,C,否
48 | E47,5743256.84,1001210.12,414,161,106,79,234,66,46,28,0.032358003,0.046048891,1285307.17,2.966145392,0,0,0,0,1,0,3,C,否
49 | E48,37027025.99,5451628.06,35,13,5,6,164,43,21,3,0.020242915,0.05511811,17195806.14,5.250937641,0,0,0,0,1,0,1,A,否
50 | E49,9628430.92,956642.35,244,4,0,0,551,130,28,4,0.026832642,0.101364522,-4707428.38,0.389280788,0,0,0,0,1,0,3,C,否
51 | E50,16419820.69,709422.73,4,1,0,0,54,11,1,0,0.055837563,0.105263158,6781297.75,2.710812996,0,0,0,0,1,0,3,C,否
52 | E51,7169019.19,532024.51,87,22,15,5,45,8,6,2,0.004415011,0.068156425,5553169.22,149.5400353,0,0,0,0,1,0,2,B,否
53 | E52,1785006.11,266081.32,8,2,1,0,108,20,3,0,0.037647059,0.245382586,1095999.81,2.928640925,0,0,0,0,1,0,4,D,是
54 | E53,1234243.06,0,33,6,4,0,48,13,1,0,0.049206349,0.127753304,244683.34,1.233460399,0,0,0,0,1,0,3,C,否
55 | E54,24868467.73,3137410.18,700,187,212,11,347,76,28,6,0.054136253,0.081690945,3650147.27,1.559499811,0,0,0,0,1,0,1,A,否
56 | E55,11859438.55,1377481.76,18,2,2,0,96,32,6,3,0.086269745,0.249152542,-3163800.21,0.461382188,0,0,0,1,0,0,3,C,否
57 | E56,-1408942.97,0,21,6,6,4,114,34,9,5,0.040799334,0.074747475,590087.43,0.608077187,0,0,0,0,1,0,3,C,否
58 | E57,2973362.72,671931.59,18,6,1,0,32,8,1,0,0.061538462,0.118721461,-2307020.6,-520.3782614,0,1,0,0,1,0,2,B,否
59 | E58,14939854.55,1044924.28,18,2,2,0,284,52,18,0,0.028846154,0.268041237,4623582.12,2.631909484,0,0,0,0,1,0,2,B,否
60 | E59,7399026.38,1165767.6,131,34,15,5,52,17,5,2,0.030965392,0.038157282,2076239.55,2.668089467,0,0,0,0,1,0,1,A,否
61 | E60,3153536.31,0,3,4,1,0,102,24,2,0,0.014869888,0.173489279,2762867.14,16.64818925,0,0,0,0,1,0,2,B,否
62 | E61,16040129.65,1704753.35,47,8,9,1,28,8,6,2,0.014150943,0.018574297,-1426691.21,0.75169374,0,0,0,0,1,0,2,B,否
63 | E62,8262640.47,497880.69,20,4,2,1,108,27,12,1,0.005660377,0.127853881,-1325522.5,0.640706858,0,0,0,1,0,0,2,B,否
64 | E63,10859983.19,3946576.94,29,11,2,0,237,34,19,2,0.029879212,0.071322437,-7595933.39,0.163752575,0,0,0,0,1,0,2,B,否
65 | E64,8798256.56,55237.46,131,20,10,1,17,2,2,0,0,0.066909091,412503.15,1.153526918,0,0,0,0,1,0,1,A,否
66 | E65,2726684.71,237765.7,36,14,4,4,21,12,5,0,0.01362862,0.06185567,745267.07,3.181304821,0,0,0,0,1,0,2,B,否
67 | E66,-375084.44,0,113,25,12,1,19,5,4,2,0.003466205,0.075110457,1573358.29,163.8030821,0,0,0,0,1,0,2,B,否
68 | E67,4315445.45,99554,18,3,2,0,157,25,12,1,0.030911901,0.106976744,2189387.65,5.055198896,0,0,0,0,1,0,2,B,否
69 | E68,5866711.11,89627.09,5,2,0,0,1,1,0,0,0,0.109589041,-2731449.09,0.364643359,0,0,0,0,1,0,3,C,否
70 | E69,3636062.74,109081.89,12,4,2,0,1,0,0,0,0,0.109028961,2661870.58,9.974805767,0,0,1,0,0,0,3,C,否
71 | E70,3595798.73,580622.56,39,8,5,3,138,29,13,7,0.022963368,0.188581315,-2036647.49,0.059261052,0,0,0,0,1,0,2,B,否
72 | E71,417041.18,0,260,18,4,0,214,48,19,0,0.042891183,0.042666667,5842485.55,-0.270946377,1,0,0,0,1,0,2,B,否
73 | E72,2943665.95,0,25,3,0,0,41,9,3,0,0.025641026,0.290322581,1453197.42,-13.39267755,1,0,0,0,1,0,3,C,否
74 | E73,5350093.41,909143.31,48,7,2,1,84,21,8,5,0.00990099,0.055084746,-1344863.84,0.494727487,0,0,0,0,1,0,3,C,否
75 | E74,3649790.04,0,8,6,3,2,7,1,0,0,0,0.077477477,1388223.66,6.797415479,0,0,0,0,1,0,2,B,否
76 | E75,3859608.51,26412.04,7451,9,2,1,34,12,4,3,0.025362319,0.068284229,-1700637.8,0.219843181,0,0,0,0,1,0,3,C,否
77 | E76,3156443.92,203043.27,58,4,9,2,78,16,7,1,0.011428571,0.089795918,940249.39,2.752940948,0,0,0,0,1,0,2,B,否
78 | E77,3607798.82,159249.17,0,1,1,0,13,2,0,1,0.018518519,0.023255814,-759119.19,0.402139273,0,0,0,0,1,0,3,C,否
79 | E78,6029427.49,260670.99,30,5,2,0,11,4,1,0,0,0.074626866,-2691870.88,0.187109816,0,0,0,0,0,1,3,C,否
80 | E79,363491.02,0,9,4,1,1,30,11,3,2,0.012987013,0.139534884,1034964.7,-1.507861627,1,0,0,0,1,0,2,B,否
81 | E80,1148651.19,27552.75,38,0,0,0,82,15,6,0,0.012658228,0.020833333,3297244.36,-5.026949012,1,0,0,0,1,0,3,C,否
82 | E81,1330514.4,221435.23,70,30,17,2,49,13,5,1,0.028436019,0.061633282,182894.31,1.580709088,0,0,0,0,1,0,1,A,否
83 | E82,1385376.45,238891.41,7,0,1,0,41,17,10,0,0.012698413,0.023323615,118211.17,1.384650542,0,0,0,0,1,0,4,D,是
84 | E83,-34876613.63,0,130,30,0,0,162,52,37,8,0.066089965,0.080736544,-11859813.95,3.545184778,0,0,0,0,1,0,2,B,否
85 | E84,3392431.19,521610.51,24,7,4,0,16,1,1,1,0.018518519,0.106425703,641060.92,1.908647944,0,0,0,0,1,0,1,A,否
86 | E85,2247393.78,55871.8,14,6,1,2,25,6,6,1,0.004366812,0.156996587,509292.93,3.370016672,0,0,0,0,1,0,2,B,否
87 | E86,411411.22,54856.36,3,4,0,0,29,6,1,0,0.007246377,0.136363636,-606338.28,0.224465111,0,0,0,1,0,0,3,C,否
88 | E87,2288881.09,240920.4,5,4,3,1,62,17,2,1,0.021108179,0.040540541,674008.58,3.983327704,0,0,0,0,1,0,3,C,是
89 | E88,1877583.81,160137.38,78,8,0,0,17,7,1,1,0,0.09,181422.33,1.393914056,0,0,0,0,1,0,1,A,否
90 | E89,-824452.26,159405.57,16,4,4,0,6,5,1,0,0.030769231,0.040697674,2630392.22,-0.244160693,1,0,0,0,1,0,1,A,否
91 | E90,2488785.48,167307.28,38,8,3,0,29,6,0,0,0.028776978,0.283524904,426767.62,1.71430123,0,0,0,0,1,0,3,C,否
92 | E91,930397.99,148985.33,65,7,1,0,20,8,6,1,0.050847458,0.046511628,267744.88,2.254806928,0,0,0,0,1,0,1,A,否
93 | E92,2060494.4,55579.19,11,2,0,0,73,12,4,0,0.004694836,0.076923077,1226750.25,4.818772443,0,0,0,0,1,0,3,C,否
94 | E93,1158753.81,26196.36,19,5,4,0,53,8,4,0,0.023474178,0.047297297,190487.11,1.773236926,0,0,0,0,1,0,2,B,否
95 | E94,1039150.21,29299.68,54,2,0,0,4,0,0,0,0,0.091836735,296547.2,2.761899454,0,0,0,0,1,0,3,C,否
96 | E95,2309683.68,69258.77,446,32,7,0,0,0,1,0,0,0.053669222,-173150.47,0.762139235,0,0,0,0,1,0,2,B,否
97 | E96,-880797.48,0,3,0,0,0,4,2,0,0,0,0.1,1348697.76,33.85250329,0,0,0,0,1,0,3,C,否
98 | E97,952646.12,28016.78,44,17,2,0,1,0,1,0,0,0.034090909,428086.28,2.842571436,0,0,1,0,0,0,2,B,否
99 | E98,1203728.22,31893.6,20,3,2,0,25,1,1,0,0.019607843,0.130718954,-106852.63,0.805678733,0,0,0,0,1,0,2,B,否
100 | E99,-5203665.22,0,1,1,0,0,73,7,1,0,0.010227273,0.375,-1114717.37,-1.215629991,0,1,0,0,1,0,4,D,是
101 | E100,650279.93,17886.26,9,2,1,1,15,5,1,0,0,0.01010101,-111912.9,0.641697941,0,0,0,0,1,0,4,D,是
102 | E101,294643.81,44576.19,2,0,0,0,4,2,0,0,0,0.492063492,-119859.57,0.421671447,0,0,0,0,1,0,4,D,是
103 | E102,-2841555.25,0,91,7,2,0,28,21,12,9,0.02739726,0.146892655,43268.35,0.951715913,0,0,0,0,1,0,4,D,是
104 | E103,990178.47,62712.09,16,3,0,0,56,7,2,0,0.01744186,0.15625,49319.54,1.085415704,0,0,0,0,1,0,4,D,是
105 | E104,262386.36,7863.64,10,0,0,0,1,0,0,0,0,0.095238095,103233.01,2.299433945,0,0,0,0,1,0,3,C,否
106 | E105,917382.45,26701.44,101,0,0,0,2,2,0,0,0,0.008196721,-134853.56,0.572421947,0,0,1,0,0,0,3,C,否
107 | E106,526975.73,15241.03,95,6,2,0,15,4,0,0,0.027777778,0.098039216,139940.23,2.157468663,0,0,0,0,1,0,2,B,否
108 | E107,682155.01,40934.99,2,0,1,0,5,2,0,0,0,0.37037037,600297.72,10.30893452,0,0,0,0,1,0,4,D,是
109 | E108,92174.11,0,18,1,0,0,51,18,1,0,0,0,-123849.11,0.035964429,0,0,0,0,1,0,4,D,是
110 | E109,439509.33,13926.67,16,1,0,0,5,2,0,0,0,0.303030303,274393.5,7.229384822,0,0,0,0,1,0,4,D,是
111 | E110,195956.47,5831.04,29,5,0,0,0,0,1,0,0,0.253012048,21642.65,1.78831463,0,0,1,0,0,0,3,C,否
112 | E111,124997.02,100848.16,5,0,1,3,74,15,4,2,0.050359712,0.184,-271144.93,-2.37302917,0,1,0,0,1,0,4,D,是
113 | E112,86946.74,19202.37,1,1,0,0,17,6,2,0,0.037735849,0.272727273,208206.57,2.69992051,0,0,0,0,1,0,4,D,是
114 | E113,-1006740.82,0,2,1,0,0,26,8,1,0,0.019230769,0.340425532,-82420.22,-0.038912999,0,1,0,0,1,0,4,D,是
115 | E114,-397912.31,0,19,1,3,0,15,4,1,0,0.092105263,0.12,-356779.9,-5.032003203,0,1,0,0,1,0,4,D,是
116 | E115,55005.68,1634.32,3,0,0,0,2,0,0,0,0,0.333333333,35728.16,4.706717462,0,0,0,0,1,0,4,D,是
117 | E116,228821.67,6742.4,24,1,0,0,8,0,0,0,0,0.085106383,-75814.26,0.066151003,0,0,0,0,1,0,4,D,是
118 | E117,516642.52,1405.48,0,1,0,0,3,0,0,0,0,0.409090909,211072.46,2.381499614,0,0,0,0,1,0,4,D,是
119 | E118,120774.03,0,111,5,0,0,7,2,0,0,0.068965517,0.055555556,-141118.15,-0.339462906,0,1,0,0,1,0,4,D,是
120 | E119,-184120.28,0,12,1,1,0,7,6,2,2,0.003174603,0.142857143,-68619.22,6.999116995,0,0,0,0,1,0,4,D,是
121 | E120,162416.14,3780.94,6,0,0,0,13,3,0,0,0.027777778,0.689655172,8830.95,1.139033405,0,0,0,0,1,0,4,D,是
122 | E121,-972024.16,0,110,6,0,0,7,2,2,0,0,0.123655914,690652.18,-0.075121715,1,0,0,0,1,0,4,D,是
123 | E122,7342.16,0,67,2,0,0,18,3,1,0,0.020833333,0.13559322,-27347.29,-0.681495628,0,1,0,0,1,0,4,D,是
124 | E123,199443.4,27276.3,3,1,1,0,1,1,0,0,0,0.492307692,17453.84,1.824360564,0,0,0,0,1,0,4,D,是
125 |
--------------------------------------------------------------------------------
/第一问:作废发票计算.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 146,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "from datetime import datetime\n",
13 | "import time"
14 | ]
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {},
19 | "source": [
20 | "# 进销口分析"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 2,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "input1 = pd.read_excel('附件1.xlsx',sheet_name='进项发票信息')\n",
30 | "output1 = pd.read_excel('附件1.xlsx',sheet_name='销项发票信息')"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 22,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "for num in range(1,124):\n",
40 | " id1 = 'E'+str(num)\n",
41 | " input1[input1['企业代号']==id1]"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 41,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "0.05579773321708806"
53 | ]
54 | },
55 | "execution_count": 41,
56 | "metadata": {},
57 | "output_type": "execute_result"
58 | }
59 | ],
60 | "source": [
61 | "num = 1\n",
62 | "id1 = 'E'+str(num)\n",
63 | "tem = input1[input1['企业代号']==id1]\n",
64 | "all_num = tem.shape[0]\n",
65 | "fei_num = tem[tem['发票状态']=='作废发票'].shape[0]\n",
66 | "ratio = fei_num/all_num\n",
67 | "ratio"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 47,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "r_in=[]\n",
77 | "\n",
78 | "for num in range(1,124):\n",
79 | " id1 = 'E'+str(num)\n",
80 | " tem = input1[input1['企业代号']==id1]\n",
81 | " all_num = tem.shape[0]\n",
82 | " fei_num = tem[tem['发票状态']=='作废发票'].shape[0]\n",
83 | " ratio = fei_num/all_num\n",
84 | " \n",
85 | " r_in.append(ratio)\n",
86 | " \n",
87 | "r_out=[]\n",
88 | "\n",
89 | "for num in range(1,124):\n",
90 | " id1 = 'E'+str(num)\n",
91 | " tem = output1[output1['企业代号']==id1]\n",
92 | " all_num = tem.shape[0]\n",
93 | " fei_num = tem[tem['发票状态']=='作废发票'].shape[0]\n",
94 | " ratio = fei_num/all_num\n",
95 | " \n",
96 | " r_out.append(ratio)\n",
97 | "\n",
98 | " \n",
99 | "\n",
100 | "name=[] \n",
101 | "for num in range(1,124):\n",
102 | " id1 = 'E'+str(num)\n",
103 | " name.append(id1)\n",
104 | " "
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 55,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": [
113 | "in_num=[]\n",
114 | "for num in range(1,124):\n",
115 | " id1 = 'E'+str(num)\n",
116 | " tem = input1[input1['企业代号']==id1]\n",
117 | " all_num = tem.shape[0]\n",
118 | " \n",
119 | " in_num.append(all_num)\n",
120 | " \n",
121 | "out_num=[]\n",
122 | "for num in range(1,124):\n",
123 | " id1 = 'E'+str(num)\n",
124 | " tem = output1[output1['企业代号']==id1]\n",
125 | " all_num = tem.shape[0]\n",
126 | " \n",
127 | " out_num.append(all_num)"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 56,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/html": [
138 | "\n",
139 | "\n",
152 | "
\n",
153 | " \n",
154 | " \n",
155 | " | \n",
156 | " 企业名称 | \n",
157 | " 进项发票的作废比例 | \n",
158 | " 销项发票的作废比例 | \n",
159 | " 进项发票数 | \n",
160 | " 销项发票数 | \n",
161 | "
\n",
162 | " \n",
163 | " \n",
164 | " \n",
165 | " 0 | \n",
166 | " E1 | \n",
167 | " 0.055798 | \n",
168 | " 0.027620 | \n",
169 | " 3441 | \n",
170 | " 8110 | \n",
171 | "
\n",
172 | " \n",
173 | " 1 | \n",
174 | " E2 | \n",
175 | " 0.022422 | \n",
176 | " 0.082002 | \n",
177 | " 32156 | \n",
178 | " 12707 | \n",
179 | "
\n",
180 | " \n",
181 | " 2 | \n",
182 | " E3 | \n",
183 | " 0.042535 | \n",
184 | " 0.015993 | \n",
185 | " 4561 | \n",
186 | " 24073 | \n",
187 | "
\n",
188 | " \n",
189 | " 3 | \n",
190 | " E4 | \n",
191 | " 0.066308 | \n",
192 | " 0.085164 | \n",
193 | " 558 | \n",
194 | " 2231 | \n",
195 | "
\n",
196 | " \n",
197 | " 4 | \n",
198 | " E5 | \n",
199 | " 0.039189 | \n",
200 | " 0.051887 | \n",
201 | " 2169 | \n",
202 | " 1060 | \n",
203 | "
\n",
204 | " \n",
205 | " ... | \n",
206 | " ... | \n",
207 | " ... | \n",
208 | " ... | \n",
209 | " ... | \n",
210 | " ... | \n",
211 | "
\n",
212 | " \n",
213 | " 118 | \n",
214 | " E119 | \n",
215 | " 0.003175 | \n",
216 | " 0.142857 | \n",
217 | " 315 | \n",
218 | " 21 | \n",
219 | "
\n",
220 | " \n",
221 | " 119 | \n",
222 | " E120 | \n",
223 | " 0.027778 | \n",
224 | " 0.689655 | \n",
225 | " 36 | \n",
226 | " 29 | \n",
227 | "
\n",
228 | " \n",
229 | " 120 | \n",
230 | " E121 | \n",
231 | " 0.000000 | \n",
232 | " 0.123656 | \n",
233 | " 50 | \n",
234 | " 186 | \n",
235 | "
\n",
236 | " \n",
237 | " 121 | \n",
238 | " E122 | \n",
239 | " 0.020833 | \n",
240 | " 0.135593 | \n",
241 | " 48 | \n",
242 | " 118 | \n",
243 | "
\n",
244 | " \n",
245 | " 122 | \n",
246 | " E123 | \n",
247 | " 0.000000 | \n",
248 | " 0.492308 | \n",
249 | " 3 | \n",
250 | " 65 | \n",
251 | "
\n",
252 | " \n",
253 | "
\n",
254 | "
123 rows × 5 columns
\n",
255 | "
"
256 | ],
257 | "text/plain": [
258 | " 企业名称 进项发票的作废比例 销项发票的作废比例 进项发票数 销项发票数\n",
259 | "0 E1 0.055798 0.027620 3441 8110\n",
260 | "1 E2 0.022422 0.082002 32156 12707\n",
261 | "2 E3 0.042535 0.015993 4561 24073\n",
262 | "3 E4 0.066308 0.085164 558 2231\n",
263 | "4 E5 0.039189 0.051887 2169 1060\n",
264 | ".. ... ... ... ... ...\n",
265 | "118 E119 0.003175 0.142857 315 21\n",
266 | "119 E120 0.027778 0.689655 36 29\n",
267 | "120 E121 0.000000 0.123656 50 186\n",
268 | "121 E122 0.020833 0.135593 48 118\n",
269 | "122 E123 0.000000 0.492308 3 65\n",
270 | "\n",
271 | "[123 rows x 5 columns]"
272 | ]
273 | },
274 | "execution_count": 56,
275 | "metadata": {},
276 | "output_type": "execute_result"
277 | }
278 | ],
279 | "source": [
280 | "data = {'企业名称':name,\n",
281 | " '进项发票的作废比例':r_in ,\n",
282 | " '销项发票的作废比例':r_out ,\n",
283 | " '进项发票数':in_num ,\n",
284 | " '销项发票数':out_num\n",
285 | "}\n",
286 | "\n",
287 | "df=pd.DataFrame(data=data)\n",
288 | "df"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 57,
294 | "metadata": {},
295 | "outputs": [],
296 | "source": [
297 | "df.to_csv('附件1作废发票比例.csv',encoding='gbk')"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "metadata": {},
303 | "source": [
304 | "# 企业文字提取"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 59,
310 | "metadata": {},
311 | "outputs": [
312 | {
313 | "data": {
314 | "text/html": [
315 | "\n",
316 | "\n",
329 | "
\n",
330 | " \n",
331 | " \n",
332 | " | \n",
333 | " 企业代号 | \n",
334 | " 企业名称 | \n",
335 | " 信誉评级 | \n",
336 | " 是否违约 | \n",
337 | "
\n",
338 | " \n",
339 | " \n",
340 | " \n",
341 | " 0 | \n",
342 | " E1 | \n",
343 | " ***电器销售有限公司 | \n",
344 | " A | \n",
345 | " 否 | \n",
346 | "
\n",
347 | " \n",
348 | " 1 | \n",
349 | " E2 | \n",
350 | " ***技术有限责任公司 | \n",
351 | " A | \n",
352 | " 否 | \n",
353 | "
\n",
354 | " \n",
355 | " 2 | \n",
356 | " E3 | \n",
357 | " ***电子(中国)有限公司***分公司 | \n",
358 | " C | \n",
359 | " 否 | \n",
360 | "
\n",
361 | " \n",
362 | " 3 | \n",
363 | " E4 | \n",
364 | " ***发展有限责任公司 | \n",
365 | " C | \n",
366 | " 否 | \n",
367 | "
\n",
368 | " \n",
369 | " 4 | \n",
370 | " E5 | \n",
371 | " ***供应链管理有限公司 | \n",
372 | " B | \n",
373 | " 否 | \n",
374 | "
\n",
375 | " \n",
376 | " ... | \n",
377 | " ... | \n",
378 | " ... | \n",
379 | " ... | \n",
380 | " ... | \n",
381 | "
\n",
382 | " \n",
383 | " 118 | \n",
384 | " E119 | \n",
385 | " ***药房 | \n",
386 | " D | \n",
387 | " 是 | \n",
388 | "
\n",
389 | " \n",
390 | " 119 | \n",
391 | " E120 | \n",
392 | " ***陈列广告有限公司 | \n",
393 | " D | \n",
394 | " 是 | \n",
395 | "
\n",
396 | " \n",
397 | " 120 | \n",
398 | " E121 | \n",
399 | " ***药业连锁有限公司***药店 | \n",
400 | " D | \n",
401 | " 是 | \n",
402 | "
\n",
403 | " \n",
404 | " 121 | \n",
405 | " E122 | \n",
406 | " ***商贸有限责任公司 | \n",
407 | " D | \n",
408 | " 是 | \n",
409 | "
\n",
410 | " \n",
411 | " 122 | \n",
412 | " E123 | \n",
413 | " ***创科技有限责任公司 | \n",
414 | " D | \n",
415 | " 是 | \n",
416 | "
\n",
417 | " \n",
418 | "
\n",
419 | "
123 rows × 4 columns
\n",
420 | "
"
421 | ],
422 | "text/plain": [
423 | " 企业代号 企业名称 信誉评级 是否违约\n",
424 | "0 E1 ***电器销售有限公司 A 否\n",
425 | "1 E2 ***技术有限责任公司 A 否\n",
426 | "2 E3 ***电子(中国)有限公司***分公司 C 否\n",
427 | "3 E4 ***发展有限责任公司 C 否\n",
428 | "4 E5 ***供应链管理有限公司 B 否\n",
429 | ".. ... ... ... ...\n",
430 | "118 E119 ***药房 D 是\n",
431 | "119 E120 ***陈列广告有限公司 D 是\n",
432 | "120 E121 ***药业连锁有限公司***药店 D 是\n",
433 | "121 E122 ***商贸有限责任公司 D 是\n",
434 | "122 E123 ***创科技有限责任公司 D 是\n",
435 | "\n",
436 | "[123 rows x 4 columns]"
437 | ]
438 | },
439 | "execution_count": 59,
440 | "metadata": {},
441 | "output_type": "execute_result"
442 | }
443 | ],
444 | "source": [
445 | "data = pd.read_excel('附件1.xlsx',sheet_name='企业信息')\n",
446 | "data"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": null,
452 | "metadata": {},
453 | "outputs": [],
454 | "source": []
455 | },
456 | {
457 | "cell_type": "code",
458 | "execution_count": null,
459 | "metadata": {},
460 | "outputs": [],
461 | "source": []
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": null,
466 | "metadata": {},
467 | "outputs": [],
468 | "source": []
469 | },
470 | {
471 | "cell_type": "markdown",
472 | "metadata": {},
473 | "source": [
474 | "# 时间序列分析"
475 | ]
476 | },
477 | {
478 | "cell_type": "code",
479 | "execution_count": 67,
480 | "metadata": {},
481 | "outputs": [],
482 | "source": [
483 | "for i in range(len(input1)): # 删除作废发票\n",
484 | " if input1.loc[i,'发票状态']=='作废发票':\n",
485 | " input1.drop(i,inplace=True)"
486 | ]
487 | },
488 | {
489 | "cell_type": "code",
490 | "execution_count": 68,
491 | "metadata": {},
492 | "outputs": [],
493 | "source": [
494 | "for i in range(len(output1)):\n",
495 | " if output1.loc[i,'发票状态']=='作废发票':\n",
496 | " output1.drop(i,inplace=True)"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": 72,
502 | "metadata": {},
503 | "outputs": [],
504 | "source": [
505 | "input1.reset_index(drop = True,inplace=True)\n",
506 | "output1.reset_index(drop = True,inplace=True)"
507 | ]
508 | },
509 | {
510 | "cell_type": "code",
511 | "execution_count": 73,
512 | "metadata": {},
513 | "outputs": [],
514 | "source": [
515 | "input1['开票日期'] = pd.to_datetime(input1['开票日期'])\n",
516 | "output1['开票日期'] = pd.to_datetime(output1['开票日期'])"
517 | ]
518 | },
519 | {
520 | "cell_type": "code",
521 | "execution_count": 219,
522 | "metadata": {},
523 | "outputs": [],
524 | "source": [
525 | "deal1=[]\n",
526 | "deal2=[]\n",
527 | "deal3=[]\n",
528 | "deal4=[]\n",
529 | "deal5=[]\n",
530 | "for num in range(1,124): # 遍历目标企业\n",
531 | " l_1 = 0\n",
532 | " l_2 = 0\n",
533 | " l_3 = 0 # 一年、两年、三年、四年的交易方\n",
534 | " l_4 = 0\n",
535 | " l_5 = 0\n",
536 | "\n",
537 | " id1 = 'E'+str(num)\n",
538 | " a = input1[input1['企业代号']==id1]\n",
539 | " a = a.reset_index(drop=True)\n",
540 | " other = np.unique(a['销方单位代号']) # 交易方代号的唯一值\n",
541 | " \n",
542 | " year = a['开票日期'][len(a)-1].year - a['开票日期'][0].year+1 \n",
543 | " begin = a['开票日期'][0].year\n",
544 | " \n",
545 | " for i in range(len(other)): # 遍历交易企业\n",
546 | " cou=0\n",
547 | " company = other[i] # 选中的交易企业\n",
548 | " for j in range(year): # 遍历每一年\n",
549 | " tem = a[(a['开票日期'] >str(begin+j) )& (a['开票日期'] \n",
595 | "\n",
608 | "\n",
609 | " \n",
610 | " \n",
611 | " | \n",
612 | " 持续一年期的交易企业个数 | \n",
613 | " 持续二年期的交易企业个数 | \n",
614 | " 持续三年期的交易企业个数 | \n",
615 | " 持续四年前的交易企业个数 | \n",
616 | " 持续五年期的交易企业个数 | \n",
617 | "
\n",
618 | " \n",
619 | " \n",
620 | " \n",
621 | " 0 | \n",
622 | " 332 | \n",
623 | " 74 | \n",
624 | " 27 | \n",
625 | " 3 | \n",
626 | " 0 | \n",
627 | "
\n",
628 | " \n",
629 | " 1 | \n",
630 | " 2424 | \n",
631 | " 780 | \n",
632 | " 314 | \n",
633 | " 103 | \n",
634 | " 0 | \n",
635 | "
\n",
636 | " \n",
637 | " 2 | \n",
638 | " 386 | \n",
639 | " 103 | \n",
640 | " 58 | \n",
641 | " 23 | \n",
642 | " 0 | \n",
643 | "
\n",
644 | " \n",
645 | " 3 | \n",
646 | " 94 | \n",
647 | " 21 | \n",
648 | " 13 | \n",
649 | " 2 | \n",
650 | " 0 | \n",
651 | "
\n",
652 | " \n",
653 | " 4 | \n",
654 | " 324 | \n",
655 | " 47 | \n",
656 | " 19 | \n",
657 | " 2 | \n",
658 | " 0 | \n",
659 | "
\n",
660 | " \n",
661 | " ... | \n",
662 | " ... | \n",
663 | " ... | \n",
664 | " ... | \n",
665 | " ... | \n",
666 | " ... | \n",
667 | "
\n",
668 | " \n",
669 | " 118 | \n",
670 | " 7 | \n",
671 | " 6 | \n",
672 | " 2 | \n",
673 | " 2 | \n",
674 | " 0 | \n",
675 | "
\n",
676 | " \n",
677 | " 119 | \n",
678 | " 13 | \n",
679 | " 3 | \n",
680 | " 0 | \n",
681 | " 0 | \n",
682 | " 0 | \n",
683 | "
\n",
684 | " \n",
685 | " 120 | \n",
686 | " 7 | \n",
687 | " 2 | \n",
688 | " 2 | \n",
689 | " 0 | \n",
690 | " 0 | \n",
691 | "
\n",
692 | " \n",
693 | " 121 | \n",
694 | " 18 | \n",
695 | " 3 | \n",
696 | " 1 | \n",
697 | " 0 | \n",
698 | " 0 | \n",
699 | "
\n",
700 | " \n",
701 | " 122 | \n",
702 | " 1 | \n",
703 | " 1 | \n",
704 | " 0 | \n",
705 | " 0 | \n",
706 | " 0 | \n",
707 | "
\n",
708 | " \n",
709 | "
\n",
710 | "123 rows × 5 columns
\n",
711 | ""
712 | ],
713 | "text/plain": [
714 | " 持续一年期的交易企业个数 持续二年期的交易企业个数 持续三年期的交易企业个数 持续四年前的交易企业个数 持续五年期的交易企业个数\n",
715 | "0 332 74 27 3 0\n",
716 | "1 2424 780 314 103 0\n",
717 | "2 386 103 58 23 0\n",
718 | "3 94 21 13 2 0\n",
719 | "4 324 47 19 2 0\n",
720 | ".. ... ... ... ... ...\n",
721 | "118 7 6 2 2 0\n",
722 | "119 13 3 0 0 0\n",
723 | "120 7 2 2 0 0\n",
724 | "121 18 3 1 0 0\n",
725 | "122 1 1 0 0 0\n",
726 | "\n",
727 | "[123 rows x 5 columns]"
728 | ]
729 | },
730 | "execution_count": 223,
731 | "metadata": {},
732 | "output_type": "execute_result"
733 | }
734 | ],
735 | "source": [
736 | "jx = pd.DataFrame(data=da1)\n",
737 | "jx"
738 | ]
739 | },
740 | {
741 | "cell_type": "code",
742 | "execution_count": 301,
743 | "metadata": {},
744 | "outputs": [],
745 | "source": [
746 | "jx.to_csv('上游持续交易企业数.csv',encoding='gbk')"
747 | ]
748 | },
749 | {
750 | "cell_type": "code",
751 | "execution_count": 230,
752 | "metadata": {},
753 | "outputs": [],
754 | "source": [
755 | "deal1=[]\n",
756 | "deal2=[]\n",
757 | "deal3=[]\n",
758 | "deal4=[]\n",
759 | "for num in range(1,124): # 遍历目标企业\n",
760 | " l_1 = 0\n",
761 | " l_2 = 0\n",
762 | " l_3 = 0 # 一年、两年、三年、四年的交易方\n",
763 | " l_4 = 0\n",
764 | "\n",
765 | " id1 = 'E'+str(num)\n",
766 | " a = output1[output1['企业代号']==id1]\n",
767 | " a = a.reset_index(drop=True)\n",
768 | " other = np.unique(a['购方单位代号']) # 交易方代号的唯一值\n",
769 | " \n",
770 | " year = a['开票日期'][len(a)-1].year - a['开票日期'][0].year+1 \n",
771 | " begin = a['开票日期'][0].year\n",
772 | " \n",
773 | " for i in range(len(other)): # 遍历交易企业\n",
774 | " cou=0\n",
775 | " company = other[i] # 选中的交易企业\n",
776 | " for j in range(year): # 遍历每一年\n",
777 | " tem = a[(a['开票日期'] >str(begin+j) )& (a['开票日期'] \n",
807 | "\n",
820 | "\n",
821 | " \n",
822 | " \n",
823 | " | \n",
824 | " 持续一年期的交易企业个数 | \n",
825 | " 持续二年期的交易企业个数 | \n",
826 | " 持续三年期的交易企业个数 | \n",
827 | " 持续四年前的交易企业个数 | \n",
828 | "
\n",
829 | " \n",
830 | " \n",
831 | " \n",
832 | " 0 | \n",
833 | " 179 | \n",
834 | " 51 | \n",
835 | " 51 | \n",
836 | " 71 | \n",
837 | "
\n",
838 | " \n",
839 | " 1 | \n",
840 | " 1147 | \n",
841 | " 268 | \n",
842 | " 121 | \n",
843 | " 43 | \n",
844 | "
\n",
845 | " \n",
846 | " 2 | \n",
847 | " 48 | \n",
848 | " 48 | \n",
849 | " 25 | \n",
850 | " 14 | \n",
851 | "
\n",
852 | " \n",
853 | " 3 | \n",
854 | " 16 | \n",
855 | " 10 | \n",
856 | " 0 | \n",
857 | " 0 | \n",
858 | "
\n",
859 | " \n",
860 | " 4 | \n",
861 | " 16 | \n",
862 | " 15 | \n",
863 | " 2 | \n",
864 | " 0 | \n",
865 | "
\n",
866 | " \n",
867 | " ... | \n",
868 | " ... | \n",
869 | " ... | \n",
870 | " ... | \n",
871 | " ... | \n",
872 | "
\n",
873 | " \n",
874 | " 118 | \n",
875 | " 12 | \n",
876 | " 1 | \n",
877 | " 1 | \n",
878 | " 0 | \n",
879 | "
\n",
880 | " \n",
881 | " 119 | \n",
882 | " 6 | \n",
883 | " 0 | \n",
884 | " 0 | \n",
885 | " 0 | \n",
886 | "
\n",
887 | " \n",
888 | " 120 | \n",
889 | " 110 | \n",
890 | " 6 | \n",
891 | " 0 | \n",
892 | " 0 | \n",
893 | "
\n",
894 | " \n",
895 | " 121 | \n",
896 | " 67 | \n",
897 | " 2 | \n",
898 | " 0 | \n",
899 | " 0 | \n",
900 | "
\n",
901 | " \n",
902 | " 122 | \n",
903 | " 3 | \n",
904 | " 1 | \n",
905 | " 1 | \n",
906 | " 0 | \n",
907 | "
\n",
908 | " \n",
909 | "
\n",
910 | "123 rows × 4 columns
\n",
911 | ""
912 | ],
913 | "text/plain": [
914 | " 持续一年期的交易企业个数 持续二年期的交易企业个数 持续三年期的交易企业个数 持续四年前的交易企业个数\n",
915 | "0 179 51 51 71\n",
916 | "1 1147 268 121 43\n",
917 | "2 48 48 25 14\n",
918 | "3 16 10 0 0\n",
919 | "4 16 15 2 0\n",
920 | ".. ... ... ... ...\n",
921 | "118 12 1 1 0\n",
922 | "119 6 0 0 0\n",
923 | "120 110 6 0 0\n",
924 | "121 67 2 0 0\n",
925 | "122 3 1 1 0\n",
926 | "\n",
927 | "[123 rows x 4 columns]"
928 | ]
929 | },
930 | "execution_count": 231,
931 | "metadata": {},
932 | "output_type": "execute_result"
933 | }
934 | ],
935 | "source": [
936 | "da2= {'持续一年期的交易企业个数':deal1,\n",
937 | " '持续二年期的交易企业个数': deal2,\n",
938 | " '持续三年期的交易企业个数': deal3,\n",
939 | " '持续四年前的交易企业个数':deal4\n",
940 | " }\n",
941 | "\n",
942 | "xx = pd.DataFrame(data=da2)\n",
943 | "xx "
944 | ]
945 | },
946 | {
947 | "cell_type": "code",
948 | "execution_count": 300,
949 | "metadata": {},
950 | "outputs": [],
951 | "source": [
952 | "xx.to_csv('下游持续交易企业.csv',encoding='gbk')"
953 | ]
954 | },
955 | {
956 | "cell_type": "code",
957 | "execution_count": null,
958 | "metadata": {},
959 | "outputs": [],
960 | "source": []
961 | },
962 | {
963 | "cell_type": "code",
964 | "execution_count": 233,
965 | "metadata": {},
966 | "outputs": [],
967 | "source": [
968 | "in_y=[]\n",
969 | "for num in range(1,124): # 遍历目标企业\n",
970 | " l_1 = 0\n",
971 | " l_2 = 0\n",
972 | " l_3 = 0 # 一年、两年、三年、四年的交易方\n",
973 | " l_4 = 0\n",
974 | " l_5 = 0\n",
975 | "\n",
976 | " id1 = 'E'+str(num)\n",
977 | " a = input1[input1['企业代号']==id1]\n",
978 | " a = a.reset_index(drop=True)\n",
979 | " other = np.unique(a['销方单位代号']) # 交易方代号的唯一值\n",
980 | " \n",
981 | " year = a['开票日期'][len(a)-1].year - a['开票日期'][0].year+1 \n",
982 | " in_y.append(year)\n",
983 | "\n",
984 | "\n",
985 | "\n",
986 | "out_y=[]\n",
987 | "for num in range(1,124): # 遍历目标企业\n",
988 | " l_1 = 0\n",
989 | " l_2 = 0\n",
990 | " l_3 = 0 # 一年、两年、三年、四年的交易方\n",
991 | " l_4 = 0\n",
992 | "\n",
993 | " id1 = 'E'+str(num)\n",
994 | " a = output1[output1['企业代号']==id1]\n",
995 | " a = a.reset_index(drop=True)\n",
996 | " other = np.unique(a['购方单位代号']) # 交易方代号的唯一值\n",
997 | " \n",
998 | " year = a['开票日期'][len(a)-1].year - a['开票日期'][0].year+1 \n",
999 | " out_y.append(year)"
1000 | ]
1001 | },
1002 | {
1003 | "cell_type": "code",
1004 | "execution_count": null,
1005 | "metadata": {},
1006 | "outputs": [],
1007 | "source": []
1008 | },
1009 | {
1010 | "cell_type": "code",
1011 | "execution_count": 245,
1012 | "metadata": {},
1013 | "outputs": [],
1014 | "source": [
1015 | "new_in = input1[(input1['开票日期']<'2020')] # 2020年之前的进项数据\n",
1016 | "new_out = output1[(output1['开票日期']<'2020')]"
1017 | ]
1018 | },
1019 | {
1020 | "cell_type": "code",
1021 | "execution_count": 246,
1022 | "metadata": {},
1023 | "outputs": [
1024 | {
1025 | "data": {
1026 | "text/html": [
1027 | "\n",
1028 | "\n",
1041 | "
\n",
1042 | " \n",
1043 | " \n",
1044 | " | \n",
1045 | " 企业代号 | \n",
1046 | " 发票号码 | \n",
1047 | " 开票日期 | \n",
1048 | " 销方单位代号 | \n",
1049 | " 金额 | \n",
1050 | " 税额 | \n",
1051 | " 价税合计 | \n",
1052 | " 发票状态 | \n",
1053 | "
\n",
1054 | " \n",
1055 | " \n",
1056 | " \n",
1057 | " 0 | \n",
1058 | " E1 | \n",
1059 | " 3390939 | \n",
1060 | " 2017-07-18 | \n",
1061 | " A00297 | \n",
1062 | " -943.40 | \n",
1063 | " -56.60 | \n",
1064 | " -1000.00 | \n",
1065 | " 有效发票 | \n",
1066 | "
\n",
1067 | " \n",
1068 | " 1 | \n",
1069 | " E1 | \n",
1070 | " 3390940 | \n",
1071 | " 2017-07-18 | \n",
1072 | " A00297 | \n",
1073 | " -4780.24 | \n",
1074 | " -286.81 | \n",
1075 | " -5067.05 | \n",
1076 | " 有效发票 | \n",
1077 | "
\n",
1078 | " \n",
1079 | " 2 | \n",
1080 | " E1 | \n",
1081 | " 3390941 | \n",
1082 | " 2017-07-18 | \n",
1083 | " A00297 | \n",
1084 | " 943.40 | \n",
1085 | " 56.60 | \n",
1086 | " 1000.00 | \n",
1087 | " 有效发票 | \n",
1088 | "
\n",
1089 | " \n",
1090 | " 3 | \n",
1091 | " E1 | \n",
1092 | " 3390942 | \n",
1093 | " 2017-07-18 | \n",
1094 | " A00297 | \n",
1095 | " 4780.24 | \n",
1096 | " 286.81 | \n",
1097 | " 5067.05 | \n",
1098 | " 有效发票 | \n",
1099 | "
\n",
1100 | " \n",
1101 | " 4 | \n",
1102 | " E1 | \n",
1103 | " 9902669 | \n",
1104 | " 2017-08-07 | \n",
1105 | " A05061 | \n",
1106 | " 326.21 | \n",
1107 | " 9.79 | \n",
1108 | " 336.00 | \n",
1109 | " 有效发票 | \n",
1110 | "
\n",
1111 | " \n",
1112 | " ... | \n",
1113 | " ... | \n",
1114 | " ... | \n",
1115 | " ... | \n",
1116 | " ... | \n",
1117 | " ... | \n",
1118 | " ... | \n",
1119 | " ... | \n",
1120 | " ... | \n",
1121 | "
\n",
1122 | " \n",
1123 | " 203333 | \n",
1124 | " E122 | \n",
1125 | " 42055639 | \n",
1126 | " 2019-03-25 | \n",
1127 | " A13332 | \n",
1128 | " 17636.09 | \n",
1129 | " 1058.17 | \n",
1130 | " 18694.26 | \n",
1131 | " 有效发票 | \n",
1132 | "
\n",
1133 | " \n",
1134 | " 203334 | \n",
1135 | " E122 | \n",
1136 | " 54706234 | \n",
1137 | " 2019-04-17 | \n",
1138 | " A08967 | \n",
1139 | " 223.30 | \n",
1140 | " 6.70 | \n",
1141 | " 230.00 | \n",
1142 | " 有效发票 | \n",
1143 | "
\n",
1144 | " \n",
1145 | " 203336 | \n",
1146 | " E123 | \n",
1147 | " 38493295 | \n",
1148 | " 2017-12-15 | \n",
1149 | " A03624 | \n",
1150 | " 264.15 | \n",
1151 | " 15.85 | \n",
1152 | " 280.00 | \n",
1153 | " 有效发票 | \n",
1154 | "
\n",
1155 | " \n",
1156 | " 203337 | \n",
1157 | " E123 | \n",
1158 | " 95472001 | \n",
1159 | " 2018-12-29 | \n",
1160 | " A03626 | \n",
1161 | " 264.15 | \n",
1162 | " 15.85 | \n",
1163 | " 280.00 | \n",
1164 | " 有效发票 | \n",
1165 | "
\n",
1166 | " \n",
1167 | " 203338 | \n",
1168 | " E123 | \n",
1169 | " 54469883 | \n",
1170 | " 2019-12-18 | \n",
1171 | " A03626 | \n",
1172 | " 264.15 | \n",
1173 | " 15.85 | \n",
1174 | " 280.00 | \n",
1175 | " 有效发票 | \n",
1176 | "
\n",
1177 | " \n",
1178 | "
\n",
1179 | "
198371 rows × 8 columns
\n",
1180 | "
"
1181 | ],
1182 | "text/plain": [
1183 | " 企业代号 发票号码 开票日期 销方单位代号 金额 税额 价税合计 发票状态\n",
1184 | "0 E1 3390939 2017-07-18 A00297 -943.40 -56.60 -1000.00 有效发票\n",
1185 | "1 E1 3390940 2017-07-18 A00297 -4780.24 -286.81 -5067.05 有效发票\n",
1186 | "2 E1 3390941 2017-07-18 A00297 943.40 56.60 1000.00 有效发票\n",
1187 | "3 E1 3390942 2017-07-18 A00297 4780.24 286.81 5067.05 有效发票\n",
1188 | "4 E1 9902669 2017-08-07 A05061 326.21 9.79 336.00 有效发票\n",
1189 | "... ... ... ... ... ... ... ... ...\n",
1190 | "203333 E122 42055639 2019-03-25 A13332 17636.09 1058.17 18694.26 有效发票\n",
1191 | "203334 E122 54706234 2019-04-17 A08967 223.30 6.70 230.00 有效发票\n",
1192 | "203336 E123 38493295 2017-12-15 A03624 264.15 15.85 280.00 有效发票\n",
1193 | "203337 E123 95472001 2018-12-29 A03626 264.15 15.85 280.00 有效发票\n",
1194 | "203338 E123 54469883 2019-12-18 A03626 264.15 15.85 280.00 有效发票\n",
1195 | "\n",
1196 | "[198371 rows x 8 columns]"
1197 | ]
1198 | },
1199 | "execution_count": 246,
1200 | "metadata": {},
1201 | "output_type": "execute_result"
1202 | }
1203 | ],
1204 | "source": [
1205 | "new_in"
1206 | ]
1207 | },
1208 | {
1209 | "cell_type": "code",
1210 | "execution_count": 292,
1211 | "metadata": {},
1212 | "outputs": [],
1213 | "source": [
1214 | "ur = []\n",
1215 | "\n",
1216 | "dd = []\n",
1217 | "for num in range(1,124): # 遍历目标企业\n",
1218 | " \n",
1219 | " id1 = 'E'+str(num)\n",
1220 | " a = new_in[new_in['企业代号']==id1]\n",
1221 | " a = a.reset_index(drop=True)\n",
1222 | " \n",
1223 | " end_1 = a['开票日期'][len(a)-1].year\n",
1224 | " begin_1 = a['开票日期'][0].year\n",
1225 | " \n",
1226 | " begin_ji_1 = a[(a['开票日期']>str(begin_1))&(a['开票日期']str(end_1))&(a['开票日期']str(begin_2))&(b['开票日期']str(end_2))&(b['开票日期']\n",
1399 | "\n",
1412 | "\n",
1413 | " \n",
1414 | " \n",
1415 | " | \n",
1416 | " 绝对数变化 | \n",
1417 | " 比例变化 | \n",
1418 | "
\n",
1419 | " \n",
1420 | " \n",
1421 | " \n",
1422 | " 0 | \n",
1423 | " -1.938721e+08 | \n",
1424 | " 1.720614 | \n",
1425 | "
\n",
1426 | " \n",
1427 | " 1 | \n",
1428 | " 1.103025e+08 | \n",
1429 | " 2.197674 | \n",
1430 | "
\n",
1431 | " \n",
1432 | " 2 | \n",
1433 | " -1.154773e+08 | \n",
1434 | " 0.516353 | \n",
1435 | "
\n",
1436 | " \n",
1437 | " 3 | \n",
1438 | " -1.168509e+09 | \n",
1439 | " 0.053642 | \n",
1440 | "
\n",
1441 | " \n",
1442 | " 4 | \n",
1443 | " 4.068217e+06 | \n",
1444 | " 85.227087 | \n",
1445 | "
\n",
1446 | " \n",
1447 | " ... | \n",
1448 | " ... | \n",
1449 | " ... | \n",
1450 | "
\n",
1451 | " \n",
1452 | " 118 | \n",
1453 | " -6.861922e+04 | \n",
1454 | " 6.999117 | \n",
1455 | "
\n",
1456 | " \n",
1457 | " 119 | \n",
1458 | " 8.830950e+03 | \n",
1459 | " 1.139033 | \n",
1460 | "
\n",
1461 | " \n",
1462 | " 120 | \n",
1463 | " 6.906522e+05 | \n",
1464 | " -0.075122 | \n",
1465 | "
\n",
1466 | " \n",
1467 | " 121 | \n",
1468 | " -2.734729e+04 | \n",
1469 | " -0.681496 | \n",
1470 | "
\n",
1471 | " \n",
1472 | " 122 | \n",
1473 | " 1.745384e+04 | \n",
1474 | " 1.824361 | \n",
1475 | "
\n",
1476 | " \n",
1477 | "
\n",
1478 | "123 rows × 2 columns
\n",
1479 | ""
1480 | ],
1481 | "text/plain": [
1482 | " 绝对数变化 比例变化\n",
1483 | "0 -1.938721e+08 1.720614\n",
1484 | "1 1.103025e+08 2.197674\n",
1485 | "2 -1.154773e+08 0.516353\n",
1486 | "3 -1.168509e+09 0.053642\n",
1487 | "4 4.068217e+06 85.227087\n",
1488 | ".. ... ...\n",
1489 | "118 -6.861922e+04 6.999117\n",
1490 | "119 8.830950e+03 1.139033\n",
1491 | "120 6.906522e+05 -0.075122\n",
1492 | "121 -2.734729e+04 -0.681496\n",
1493 | "122 1.745384e+04 1.824361\n",
1494 | "\n",
1495 | "[123 rows x 2 columns]"
1496 | ]
1497 | },
1498 | "execution_count": 294,
1499 | "metadata": {},
1500 | "output_type": "execute_result"
1501 | }
1502 | ],
1503 | "source": [
1504 | "data3 = {'绝对数变化':dd,\n",
1505 | " '比例变化':ur}\n",
1506 | "df3 = pd.DataFrame(data=data3)\n",
1507 | "df3"
1508 | ]
1509 | },
1510 | {
1511 | "cell_type": "code",
1512 | "execution_count": 297,
1513 | "metadata": {
1514 | "scrolled": true
1515 | },
1516 | "outputs": [
1517 | {
1518 | "data": {
1519 | "text/html": [
1520 | "\n",
1521 | "\n",
1534 | "
\n",
1535 | " \n",
1536 | " \n",
1537 | " | \n",
1538 | " 绝对数变化 | \n",
1539 | " 比例变化 | \n",
1540 | " 是否扭亏为盈利 | \n",
1541 | " 是否变为亏损 | \n",
1542 | "
\n",
1543 | " \n",
1544 | " \n",
1545 | " \n",
1546 | " 0 | \n",
1547 | " -1.938721e+08 | \n",
1548 | " 1.720614 | \n",
1549 | " 0.0 | \n",
1550 | " 0.0 | \n",
1551 | "
\n",
1552 | " \n",
1553 | " 1 | \n",
1554 | " 1.103025e+08 | \n",
1555 | " 2.197674 | \n",
1556 | " 0.0 | \n",
1557 | " 0.0 | \n",
1558 | "
\n",
1559 | " \n",
1560 | " 2 | \n",
1561 | " -1.154773e+08 | \n",
1562 | " 0.516353 | \n",
1563 | " 0.0 | \n",
1564 | " 0.0 | \n",
1565 | "
\n",
1566 | " \n",
1567 | " 3 | \n",
1568 | " -1.168509e+09 | \n",
1569 | " 0.053642 | \n",
1570 | " 0.0 | \n",
1571 | " 0.0 | \n",
1572 | "
\n",
1573 | " \n",
1574 | " 4 | \n",
1575 | " 4.068217e+06 | \n",
1576 | " 85.227087 | \n",
1577 | " 0.0 | \n",
1578 | " 0.0 | \n",
1579 | "
\n",
1580 | " \n",
1581 | " ... | \n",
1582 | " ... | \n",
1583 | " ... | \n",
1584 | " ... | \n",
1585 | " ... | \n",
1586 | "
\n",
1587 | " \n",
1588 | " 118 | \n",
1589 | " -6.861922e+04 | \n",
1590 | " 6.999117 | \n",
1591 | " 0.0 | \n",
1592 | " 0.0 | \n",
1593 | "
\n",
1594 | " \n",
1595 | " 119 | \n",
1596 | " 8.830950e+03 | \n",
1597 | " 1.139033 | \n",
1598 | " 0.0 | \n",
1599 | " 0.0 | \n",
1600 | "
\n",
1601 | " \n",
1602 | " 120 | \n",
1603 | " 6.906522e+05 | \n",
1604 | " -0.075122 | \n",
1605 | " 1.0 | \n",
1606 | " 0.0 | \n",
1607 | "
\n",
1608 | " \n",
1609 | " 121 | \n",
1610 | " -2.734729e+04 | \n",
1611 | " -0.681496 | \n",
1612 | " 0.0 | \n",
1613 | " 1.0 | \n",
1614 | "
\n",
1615 | " \n",
1616 | " 122 | \n",
1617 | " 1.745384e+04 | \n",
1618 | " 1.824361 | \n",
1619 | " 0.0 | \n",
1620 | " 0.0 | \n",
1621 | "
\n",
1622 | " \n",
1623 | "
\n",
1624 | "
123 rows × 4 columns
\n",
1625 | "
"
1626 | ],
1627 | "text/plain": [
1628 | " 绝对数变化 比例变化 是否扭亏为盈利 是否变为亏损\n",
1629 | "0 -1.938721e+08 1.720614 0.0 0.0\n",
1630 | "1 1.103025e+08 2.197674 0.0 0.0\n",
1631 | "2 -1.154773e+08 0.516353 0.0 0.0\n",
1632 | "3 -1.168509e+09 0.053642 0.0 0.0\n",
1633 | "4 4.068217e+06 85.227087 0.0 0.0\n",
1634 | ".. ... ... ... ...\n",
1635 | "118 -6.861922e+04 6.999117 0.0 0.0\n",
1636 | "119 8.830950e+03 1.139033 0.0 0.0\n",
1637 | "120 6.906522e+05 -0.075122 1.0 0.0\n",
1638 | "121 -2.734729e+04 -0.681496 0.0 1.0\n",
1639 | "122 1.745384e+04 1.824361 0.0 0.0\n",
1640 | "\n",
1641 | "[123 rows x 4 columns]"
1642 | ]
1643 | },
1644 | "execution_count": 297,
1645 | "metadata": {},
1646 | "output_type": "execute_result"
1647 | }
1648 | ],
1649 | "source": [
1650 | "for i in range(len(df3)):\n",
1651 | " if (df3.loc[i,'绝对数变化']>0) & (df3.loc[i,'比例变化']<0): # 说明扭亏为盈\n",
1652 | " df3.loc[i,'是否扭亏为盈利']=1\n",
1653 | " else:\n",
1654 | " df3.loc[i,'是否扭亏为盈利']=0\n",
1655 | "\n",
1656 | "for i in range(len(df3)):\n",
1657 | " if (df3.loc[i,'绝对数变化']<0) & (df3.loc[i,'比例变化']<0): # 说明扭亏为盈\n",
1658 | " df3.loc[i,'是否变为亏损']=1\n",
1659 | " else:\n",
1660 | " df3.loc[i,'是否变为亏损']=0 \n",
1661 | "\n",
1662 | "df3"
1663 | ]
1664 | },
1665 | {
1666 | "cell_type": "code",
1667 | "execution_count": 299,
1668 | "metadata": {},
1669 | "outputs": [],
1670 | "source": [
1671 | "df3.to_csv('发展程度.csv',encoding='gbk')"
1672 | ]
1673 | },
1674 | {
1675 | "cell_type": "code",
1676 | "execution_count": 290,
1677 | "metadata": {},
1678 | "outputs": [],
1679 | "source": [
1680 | "num=5\n",
1681 | "id1 = 'E'+str(num)\n",
1682 | "a = new_in[new_in['企业代号']==id1]\n",
1683 | "a = a.reset_index(drop=True)\n",
1684 | " \n",
1685 | "end_1 = a['开票日期'][len(a)-1].year\n",
1686 | "begin_1 = a['开票日期'][0].year\n",
1687 | " \n",
1688 | "begin_ji_1 = a[(a['开票日期']>str(begin_1))&(a['开票日期']str(end_1))&(a['开票日期']str(begin_2))&(b['开票日期']str(end_2))&(b['开票日期']\n",
1957 | "\n",
1970 | "\n",
1971 | " \n",
1972 | " \n",
1973 | " | \n",
1974 | " 企业名称 | \n",
1975 | " 进项发票的作废比例 | \n",
1976 | " 销项发票的作废比例 | \n",
1977 | "
\n",
1978 | " \n",
1979 | " \n",
1980 | " \n",
1981 | " 0 | \n",
1982 | " E124 | \n",
1983 | " 0.123313 | \n",
1984 | " 0.150039 | \n",
1985 | "
\n",
1986 | " \n",
1987 | " 1 | \n",
1988 | " E125 | \n",
1989 | " 0.123817 | \n",
1990 | " 0.134796 | \n",
1991 | "
\n",
1992 | " \n",
1993 | " 2 | \n",
1994 | " E126 | \n",
1995 | " 0.033771 | \n",
1996 | " 0.166227 | \n",
1997 | "
\n",
1998 | " \n",
1999 | " 3 | \n",
2000 | " E127 | \n",
2001 | " 0.017931 | \n",
2002 | " 0.030303 | \n",
2003 | "
\n",
2004 | " \n",
2005 | " 4 | \n",
2006 | " E128 | \n",
2007 | " 0.027312 | \n",
2008 | " 0.074900 | \n",
2009 | "
\n",
2010 | " \n",
2011 | " ... | \n",
2012 | " ... | \n",
2013 | " ... | \n",
2014 | " ... | \n",
2015 | "
\n",
2016 | " \n",
2017 | " 297 | \n",
2018 | " E421 | \n",
2019 | " 0.000000 | \n",
2020 | " 0.034483 | \n",
2021 | "
\n",
2022 | " \n",
2023 | " 298 | \n",
2024 | " E422 | \n",
2025 | " 0.000000 | \n",
2026 | " 0.100000 | \n",
2027 | "
\n",
2028 | " \n",
2029 | " 299 | \n",
2030 | " E423 | \n",
2031 | " 0.000000 | \n",
2032 | " 0.142857 | \n",
2033 | "
\n",
2034 | " \n",
2035 | " 300 | \n",
2036 | " E424 | \n",
2037 | " 0.000000 | \n",
2038 | " 0.139535 | \n",
2039 | "
\n",
2040 | " \n",
2041 | " 301 | \n",
2042 | " E425 | \n",
2043 | " 0.016949 | \n",
2044 | " 0.444444 | \n",
2045 | "
\n",
2046 | " \n",
2047 | "
\n",
2048 | "302 rows × 3 columns
\n",
2049 | ""
2050 | ],
2051 | "text/plain": [
2052 | " 企业名称 进项发票的作废比例 销项发票的作废比例\n",
2053 | "0 E124 0.123313 0.150039\n",
2054 | "1 E125 0.123817 0.134796\n",
2055 | "2 E126 0.033771 0.166227\n",
2056 | "3 E127 0.017931 0.030303\n",
2057 | "4 E128 0.027312 0.074900\n",
2058 | ".. ... ... ...\n",
2059 | "297 E421 0.000000 0.034483\n",
2060 | "298 E422 0.000000 0.100000\n",
2061 | "299 E423 0.000000 0.142857\n",
2062 | "300 E424 0.000000 0.139535\n",
2063 | "301 E425 0.016949 0.444444\n",
2064 | "\n",
2065 | "[302 rows x 3 columns]"
2066 | ]
2067 | },
2068 | "execution_count": 10,
2069 | "metadata": {},
2070 | "output_type": "execute_result"
2071 | }
2072 | ],
2073 | "source": [
2074 | "data = {'企业名称':name,\n",
2075 | " '进项发票的作废比例':r_in ,\n",
2076 | " '销项发票的作废比例':r_out \n",
2077 | "}\n",
2078 | "\n",
2079 | "df=pd.DataFrame(data=data)\n",
2080 | "df"
2081 | ]
2082 | },
2083 | {
2084 | "cell_type": "code",
2085 | "execution_count": 11,
2086 | "metadata": {},
2087 | "outputs": [],
2088 | "source": [
2089 | "df.to_csv('第二问作废比例.csv',encoding='gbk')"
2090 | ]
2091 | },
2092 | {
2093 | "cell_type": "code",
2094 | "execution_count": 12,
2095 | "metadata": {},
2096 | "outputs": [],
2097 | "source": [
2098 | "new_in = input1[(input1['开票日期']<'2020')] # 2020年之前的进项数据\n",
2099 | "new_out = output1[(output1['开票日期']<'2020')]"
2100 | ]
2101 | },
2102 | {
2103 | "cell_type": "code",
2104 | "execution_count": 13,
2105 | "metadata": {},
2106 | "outputs": [],
2107 | "source": [
2108 | "ur = []\n",
2109 | "\n",
2110 | "dd = []\n",
2111 | "for num in range(1,303): # 遍历目标企业\n",
2112 | " \n",
2113 | " id1 = 'E'+str(num+123)\n",
2114 | " a = new_in[new_in['企业代号']==id1]\n",
2115 | " a = a.reset_index(drop=True)\n",
2116 | " \n",
2117 | " end_1 = a['开票日期'][len(a)-1].year\n",
2118 | " begin_1 = a['开票日期'][0].year\n",
2119 | " \n",
2120 | " begin_ji_1 = a[(a['开票日期']>str(begin_1))&(a['开票日期']str(end_1))&(a['开票日期']str(begin_2))&(b['开票日期']str(end_2))&(b['开票日期']\n",
2151 | "\n",
2164 | "\n",
2165 | " \n",
2166 | " \n",
2167 | " | \n",
2168 | " 绝对数变化 | \n",
2169 | " 比例变化 | \n",
2170 | "
\n",
2171 | " \n",
2172 | " \n",
2173 | " \n",
2174 | " 0 | \n",
2175 | " -1.065813e+08 | \n",
2176 | " -2.086819 | \n",
2177 | "
\n",
2178 | " \n",
2179 | " 1 | \n",
2180 | " -1.697853e+08 | \n",
2181 | " -1.069323 | \n",
2182 | "
\n",
2183 | " \n",
2184 | " 2 | \n",
2185 | " 1.540513e+08 | \n",
2186 | " 2.527125 | \n",
2187 | "
\n",
2188 | " \n",
2189 | " 3 | \n",
2190 | " -8.365728e+07 | \n",
2191 | " 0.677000 | \n",
2192 | "
\n",
2193 | " \n",
2194 | " 4 | \n",
2195 | " 8.388073e+07 | \n",
2196 | " 2.918189 | \n",
2197 | "
\n",
2198 | " \n",
2199 | " ... | \n",
2200 | " ... | \n",
2201 | " ... | \n",
2202 | "
\n",
2203 | " \n",
2204 | " 297 | \n",
2205 | " -5.554860e+04 | \n",
2206 | " 0.110407 | \n",
2207 | "
\n",
2208 | " \n",
2209 | " 298 | \n",
2210 | " 1.892226e+04 | \n",
2211 | " 6.340986 | \n",
2212 | "
\n",
2213 | " \n",
2214 | " 299 | \n",
2215 | " 1.143155e+04 | \n",
2216 | " 0.214670 | \n",
2217 | "
\n",
2218 | " \n",
2219 | " 300 | \n",
2220 | " -8.175862e+04 | \n",
2221 | " 2.007337 | \n",
2222 | "
\n",
2223 | " \n",
2224 | " 301 | \n",
2225 | " -1.218773e+05 | \n",
2226 | " -0.132198 | \n",
2227 | "
\n",
2228 | " \n",
2229 | "
\n",
2230 | "302 rows × 2 columns
\n",
2231 | ""
2232 | ],
2233 | "text/plain": [
2234 | " 绝对数变化 比例变化\n",
2235 | "0 -1.065813e+08 -2.086819\n",
2236 | "1 -1.697853e+08 -1.069323\n",
2237 | "2 1.540513e+08 2.527125\n",
2238 | "3 -8.365728e+07 0.677000\n",
2239 | "4 8.388073e+07 2.918189\n",
2240 | ".. ... ...\n",
2241 | "297 -5.554860e+04 0.110407\n",
2242 | "298 1.892226e+04 6.340986\n",
2243 | "299 1.143155e+04 0.214670\n",
2244 | "300 -8.175862e+04 2.007337\n",
2245 | "301 -1.218773e+05 -0.132198\n",
2246 | "\n",
2247 | "[302 rows x 2 columns]"
2248 | ]
2249 | },
2250 | "execution_count": 14,
2251 | "metadata": {},
2252 | "output_type": "execute_result"
2253 | }
2254 | ],
2255 | "source": [
2256 | "data3 = {'绝对数变化':dd,\n",
2257 | " '比例变化':ur}\n",
2258 | "df3 = pd.DataFrame(data=data3)\n",
2259 | "df3"
2260 | ]
2261 | },
2262 | {
2263 | "cell_type": "code",
2264 | "execution_count": 15,
2265 | "metadata": {},
2266 | "outputs": [
2267 | {
2268 | "data": {
2269 | "text/html": [
2270 | "\n",
2271 | "\n",
2284 | "
\n",
2285 | " \n",
2286 | " \n",
2287 | " | \n",
2288 | " 绝对数变化 | \n",
2289 | " 比例变化 | \n",
2290 | " 是否扭亏为盈利 | \n",
2291 | " 是否变为亏损 | \n",
2292 | "
\n",
2293 | " \n",
2294 | " \n",
2295 | " \n",
2296 | " 0 | \n",
2297 | " -1.065813e+08 | \n",
2298 | " -2.086819 | \n",
2299 | " 0.0 | \n",
2300 | " 1.0 | \n",
2301 | "
\n",
2302 | " \n",
2303 | " 1 | \n",
2304 | " -1.697853e+08 | \n",
2305 | " -1.069323 | \n",
2306 | " 0.0 | \n",
2307 | " 1.0 | \n",
2308 | "
\n",
2309 | " \n",
2310 | " 2 | \n",
2311 | " 1.540513e+08 | \n",
2312 | " 2.527125 | \n",
2313 | " 0.0 | \n",
2314 | " 0.0 | \n",
2315 | "
\n",
2316 | " \n",
2317 | " 3 | \n",
2318 | " -8.365728e+07 | \n",
2319 | " 0.677000 | \n",
2320 | " 0.0 | \n",
2321 | " 0.0 | \n",
2322 | "
\n",
2323 | " \n",
2324 | " 4 | \n",
2325 | " 8.388073e+07 | \n",
2326 | " 2.918189 | \n",
2327 | " 0.0 | \n",
2328 | " 0.0 | \n",
2329 | "
\n",
2330 | " \n",
2331 | " ... | \n",
2332 | " ... | \n",
2333 | " ... | \n",
2334 | " ... | \n",
2335 | " ... | \n",
2336 | "
\n",
2337 | " \n",
2338 | " 297 | \n",
2339 | " -5.554860e+04 | \n",
2340 | " 0.110407 | \n",
2341 | " 0.0 | \n",
2342 | " 0.0 | \n",
2343 | "
\n",
2344 | " \n",
2345 | " 298 | \n",
2346 | " 1.892226e+04 | \n",
2347 | " 6.340986 | \n",
2348 | " 0.0 | \n",
2349 | " 0.0 | \n",
2350 | "
\n",
2351 | " \n",
2352 | " 299 | \n",
2353 | " 1.143155e+04 | \n",
2354 | " 0.214670 | \n",
2355 | " 0.0 | \n",
2356 | " 0.0 | \n",
2357 | "
\n",
2358 | " \n",
2359 | " 300 | \n",
2360 | " -8.175862e+04 | \n",
2361 | " 2.007337 | \n",
2362 | " 0.0 | \n",
2363 | " 0.0 | \n",
2364 | "
\n",
2365 | " \n",
2366 | " 301 | \n",
2367 | " -1.218773e+05 | \n",
2368 | " -0.132198 | \n",
2369 | " 0.0 | \n",
2370 | " 1.0 | \n",
2371 | "
\n",
2372 | " \n",
2373 | "
\n",
2374 | "
302 rows × 4 columns
\n",
2375 | "
"
2376 | ],
2377 | "text/plain": [
2378 | " 绝对数变化 比例变化 是否扭亏为盈利 是否变为亏损\n",
2379 | "0 -1.065813e+08 -2.086819 0.0 1.0\n",
2380 | "1 -1.697853e+08 -1.069323 0.0 1.0\n",
2381 | "2 1.540513e+08 2.527125 0.0 0.0\n",
2382 | "3 -8.365728e+07 0.677000 0.0 0.0\n",
2383 | "4 8.388073e+07 2.918189 0.0 0.0\n",
2384 | ".. ... ... ... ...\n",
2385 | "297 -5.554860e+04 0.110407 0.0 0.0\n",
2386 | "298 1.892226e+04 6.340986 0.0 0.0\n",
2387 | "299 1.143155e+04 0.214670 0.0 0.0\n",
2388 | "300 -8.175862e+04 2.007337 0.0 0.0\n",
2389 | "301 -1.218773e+05 -0.132198 0.0 1.0\n",
2390 | "\n",
2391 | "[302 rows x 4 columns]"
2392 | ]
2393 | },
2394 | "execution_count": 15,
2395 | "metadata": {},
2396 | "output_type": "execute_result"
2397 | }
2398 | ],
2399 | "source": [
2400 | "for i in range(len(df3)):\n",
2401 | " if (df3.loc[i,'绝对数变化']>0) & (df3.loc[i,'比例变化']<0): # 说明扭亏为盈\n",
2402 | " df3.loc[i,'是否扭亏为盈利']=1\n",
2403 | " else:\n",
2404 | " df3.loc[i,'是否扭亏为盈利']=0\n",
2405 | "\n",
2406 | "for i in range(len(df3)):\n",
2407 | " if (df3.loc[i,'绝对数变化']<0) & (df3.loc[i,'比例变化']<0): # 说明扭亏为盈\n",
2408 | " df3.loc[i,'是否变为亏损']=1\n",
2409 | " else:\n",
2410 | " df3.loc[i,'是否变为亏损']=0 \n",
2411 | "\n",
2412 | "df3"
2413 | ]
2414 | },
2415 | {
2416 | "cell_type": "code",
2417 | "execution_count": 16,
2418 | "metadata": {},
2419 | "outputs": [],
2420 | "source": [
2421 | "df3.to_csv('第二问绝对数和比例变化.csv',encoding='gbk')"
2422 | ]
2423 | },
2424 | {
2425 | "cell_type": "code",
2426 | "execution_count": null,
2427 | "metadata": {},
2428 | "outputs": [],
2429 | "source": []
2430 | }
2431 | ],
2432 | "metadata": {
2433 | "kernelspec": {
2434 | "display_name": "Python [conda env:root] *",
2435 | "language": "python",
2436 | "name": "conda-root-py"
2437 | },
2438 | "language_info": {
2439 | "codemirror_mode": {
2440 | "name": "ipython",
2441 | "version": 3
2442 | },
2443 | "file_extension": ".py",
2444 | "mimetype": "text/x-python",
2445 | "name": "python",
2446 | "nbconvert_exporter": "python",
2447 | "pygments_lexer": "ipython3",
2448 | "version": "3.7.6"
2449 | },
2450 | "toc": {
2451 | "base_numbering": 1,
2452 | "nav_menu": {},
2453 | "number_sections": true,
2454 | "sideBar": true,
2455 | "skip_h1_title": false,
2456 | "title_cell": "Table of Contents",
2457 | "title_sidebar": "Contents",
2458 | "toc_cell": false,
2459 | "toc_position": {},
2460 | "toc_section_display": true,
2461 | "toc_window_display": false
2462 | }
2463 | },
2464 | "nbformat": 4,
2465 | "nbformat_minor": 4
2466 | }
2467 |
--------------------------------------------------------------------------------
/论文.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leek-emperor/2020-CMCM-C/1d8a26c3a095726d45e107581c4bb204992b6e28/论文.pdf
--------------------------------------------------------------------------------