├── .gitignore ├── AML_final_poster.pdf ├── README.md ├── feature_select_corr_ML ├── Mlp.py ├── Mlp.txt ├── SVM.py ├── SVM.txt ├── degree_2_logistic_regression.py ├── degree_2_logistic_regression.txt ├── degree_2_polynominal_regression.py ├── degree_2_polynominal_regression.txt ├── deleted.txt ├── linear_regression.py ├── linear_regression.txt ├── logistic_regression.py ├── logistic_regression.txt ├── logistic_regressionCV.py ├── logistic_regressionCV.txt ├── random_forest.py ├── random_forest.txt ├── run_list.sh └── shared.py ├── feature_select_pca_ML ├── Mlp.py ├── Mlp.txt ├── SVM.py ├── SVM.txt ├── degree_2_logistic_regression.py ├── degree_2_logistic_regression.txt ├── degree_2_polynominal_regression.py ├── degree_2_polynominal_regression.txt ├── linear_regression.py ├── linear_regression.txt ├── logistic_regression.py ├── logistic_regression.txt ├── logistic_regressionCV.py ├── logistic_regressionCV.txt ├── random_forest.py ├── random_forest.txt ├── run_list.sh └── shared.py ├── image ├── Feature000.png ├── Feature001.png ├── Feature002.png ├── Feature003.png ├── Feature004.png ├── Feature005.png ├── Feature006.png ├── Feature007.png ├── Feature008.png ├── Feature009.png ├── Feature010.png ├── Feature011.png ├── Feature012.png ├── Feature013.png ├── Feature014.png ├── Feature015.png ├── Feature016.png ├── Feature017.png ├── Feature018.png ├── Feature019.png ├── Feature020.png ├── Feature021.png ├── Feature022.png ├── Feature023.png ├── Feature024.png ├── Feature025.png ├── Feature026.png ├── Feature027.png ├── Feature028.png ├── Feature029.png ├── Feature030.png ├── Feature031.png ├── Feature032.png ├── Feature033.png ├── Feature034.png ├── Feature035.png ├── Feature036.png ├── Feature037.png ├── Feature038.png ├── Feature039.png ├── Feature040.png ├── Feature041.png ├── Feature042.png ├── Feature043.png ├── Feature044.png ├── Feature045.png ├── Feature046.png ├── Feature047.png ├── Feature048.png ├── Feature049.png ├── Feature050.png ├── Feature051.png ├── Feature052.png ├── Feature053.png ├── Feature054.png ├── Feature055.png ├── Feature056.png ├── Feature057.png ├── Feature058.png ├── Feature059.png ├── Feature060.png ├── Feature061.png ├── Feature062.png ├── Feature063.png ├── Feature064.png ├── Feature065.png ├── Feature066.png ├── Feature067.png ├── Feature068.png ├── Feature069.png ├── Feature070.png ├── Feature071.png ├── Feature072.png ├── Feature073.png ├── Feature074.png ├── Feature075.png ├── Feature076.png ├── Feature077.png ├── Feature078.png ├── Feature079.png ├── Feature080.png ├── Feature081.png ├── Feature082.png ├── Feature083.png ├── Feature084.png ├── Feature085.png ├── Feature086.png ├── Feature087.png ├── Feature088.png ├── Feature089.png ├── Feature090.png ├── Feature091.png ├── Feature092.png ├── Feature093.png ├── Feature094.png ├── Feature095.png ├── Feature096.png ├── Feature097.png ├── Feature098.png ├── Feature099.png ├── Feature100.png ├── Feature101.png ├── Feature102.png ├── Feature103.png ├── Feature104.png ├── Feature105.png ├── Feature106.png ├── Feature107.png ├── Feature108.png ├── Feature109.png ├── Feature110.png ├── Feature111.png ├── Feature112.png ├── Feature113.png ├── Feature114.png ├── Feature115.png ├── Feature116.png ├── Feature117.png ├── Feature118.png ├── Feature119.png ├── Feature120.png ├── Feature121.png ├── Feature122.png ├── Feature123.png ├── Feature124.png ├── Feature125.png ├── Feature126.png ├── Feature127.png ├── Feature128.png ├── Feature129.png ├── Feature130.png ├── Feature131.png ├── Feature132.png ├── Feature133.png ├── Feature134.png ├── Feature135.png ├── Feature136.png ├── Feature137.png ├── Feature138.png ├── Feature139.png ├── Feature140.png ├── Feature141.png ├── Feature142.png ├── Feature143.png ├── Feature144.png ├── Feature145.png ├── Feature146.png ├── Feature147.png ├── Feature148.png ├── Feature149.png ├── Feature150.png ├── Feature151.png ├── Feature152.png ├── Feature153.png ├── Feature154.png ├── Feature155.png ├── Feature156.png ├── Feature157.png ├── Feature158.png ├── Feature159.png ├── Feature160.png ├── Feature161.png ├── Feature162.png ├── Feature163.png ├── Feature164.png ├── Feature165.png ├── Feature166.png ├── Feature167.png ├── Feature168.png ├── Feature169.png ├── Feature170.png ├── corr_feature_select.png ├── corr_modified.png ├── corr_pca.png ├── corr_raw.png ├── corr_y_modified.png ├── corr_y_pca.png ├── corr_y_raw.png ├── llicit2D.png ├── vif_modified.png └── vif_raw.png ├── modified_ML ├── Mlp.py ├── Mlp.txt ├── SVM.py ├── SVM.txt ├── degree_2_logistic_regression.py ├── degree_2_polynominal_regression.py ├── linear_regression.py ├── linear_regression.txt ├── logistic_regression.py ├── logistic_regression.txt ├── logistic_regressionCV.py ├── logistic_regressionCV.txt ├── random_forest.py ├── random_forest.txt ├── run_list.sh └── shared.py ├── orginizing_data_src ├── classify_data.py ├── connect_real_txs.py └── graph.py ├── pca_ML ├── Mlp.py ├── Mlp.txt ├── SVM.py ├── SVM.txt ├── corr_pca.txt ├── degree_2_logistic_regression.py ├── degree_2_logistic_regression.txt ├── degree_2_polynominal_regression.py ├── degree_2_polynominal_regression.txt ├── linear_regression.py ├── linear_regression.txt ├── logistic_regression.py ├── logistic_regression.txt ├── logistic_regressionCV.py ├── logistic_regressionCV.txt ├── random_forest.py ├── random_forest.txt ├── run_list.sh └── shared.py ├── raw_ML ├── Mlp.py ├── Mlp.txt ├── SVM.py ├── SVM.txt ├── degree_2_logistic_regression.py ├── degree_2_logistic_regression.txt ├── degree_2_polynominal_regression.py ├── degree_2_polynominal_regression.txt ├── linear_regression.py ├── linear_regression.txt ├── logistic_regression.py ├── logistic_regression.txt ├── logistic_regressionCV.py ├── logistic_regressionCV.txt ├── random_forest.py ├── random_forest.txt ├── run_list.sh └── shared.py ├── requirements.txt ├── start.sh └── stat ├── corr_modified.txt ├── corr_pca.txt ├── corr_raw.txt ├── hy_testing_modified.py ├── hy_testing_pca.py ├── hy_testing_raw.py ├── linear_modified.txt ├── linear_pca.txt ├── linear_raw.txt ├── logistic_pca.txt ├── pca_plot.py ├── run_list.sh ├── time_series_modified.py ├── time_series_raw.py ├── vif_modified.py ├── vif_modified.txt ├── vif_raw.py └── vif_raw.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | .vscode/settings.json 4 | -------------------------------------------------------------------------------- /AML_final_poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/AML_final_poster.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elliptic Dataset Analysis (Antimoney Laundry in Bitcoin) 2 | 3 | ## Table of content 4 | - [Introduction](#introduction) 5 | - [Dataset](#dataset) 6 | - [File tree](#file-tree) 7 | - [Learning Model](#learning-model) 8 | - [Usage](#usage) 9 | - [Requirements](#requirements) 10 | 11 | ## Introduction 12 | - Reproduce the experiment of the [paper](https://arxiv.org/abs/1908.02591) 13 | - Add new features and data analysis for the elliptic data set 14 | - Add new ML method to rerun the experiment 15 | - Deanonymized the transaction and add real bitcoin transaction data 16 | 17 | ## Dataset 18 | - Elliptic Data Set 19 | https://www.kaggle.com/ellipticco/elliptic-data-set 20 | 21 | - Deanonymized Transactions 22 | https://www.kaggle.com/alexbenzik/deanonymized-995-pct-of-elliptic-transactions 23 | 24 | ## File tree 25 | ```bash 26 | . 27 | ├── elliptic_bitcoin_dataset 28 | | ├── full_data.csv 29 | | ├── Result.csv 30 | | ├── elliptic_txs_edgelist.csv 31 | | ├── elliptic_txs_features.csv 32 | | └── elliptic_txs_classes.csv 33 | ├── NMLab-Final-Antimoney-Laundry 34 | └── txs 35 | ``` 36 | ## Learning Model 37 | - Linear Regression 38 | - Logistic Regression 39 | - MLP 40 | - SVM 41 | - Random Forest 42 | - Logistic Regression with Cross Validation 43 | 44 | ## Usage 45 | 46 | ```bash 47 | python3 -m pip install -r requirements.txt 48 | ./start.sh 49 | ``` 50 | * -all | -A : run all script 51 | * -raw | -R : run replication of the expriment 52 | * -modified | -M : run with modified features 53 | * -pca | -P : run with pca modified features 54 | * -pcaf | -PF : run with pca modified features with feature selection 55 | * -corrf | -CF : run with modified features with feature selection 56 | * -stat | -S : run stat analysis 57 | 58 | 59 | ## Requirements 60 | 61 | - python >=3.6 62 | - numpy 63 | - pandas 64 | - scikit-learn 65 | - seaborn 66 | - statsmodels 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /feature_select_corr_ML/Mlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.neural_network import MLPClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = MLPClassifier(hidden_layer_sizes=(50, ), random_state=0, max_iter=300).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./Mlp.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() 24 | 25 | -------------------------------------------------------------------------------- /feature_select_corr_ML/Mlp.txt: -------------------------------------------------------------------------------- 1 | precision = 0.17323232323232324 2 | recall = 0.7751412429378531 3 | F1 = 0.28317853457172343 4 | precision recall f1-score support 5 | 6 | 0 0.98 0.75 0.85 12929 7 | 1 0.17 0.78 0.28 885 8 | 9 | accuracy 0.75 13814 10 | macro avg 0.58 0.76 0.57 13814 11 | weighted avg 0.93 0.75 0.81 13814 12 | [[9655 3274] 13 | [ 199 686]] -------------------------------------------------------------------------------- /feature_select_corr_ML/SVM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import precision_recall_fscore_support 7 | from sklearn.metrics import confusion_matrix, classification_report 8 | from sklearn.pipeline import make_pipeline 9 | from sklearn.preprocessing import StandardScaler 10 | from sklearn.svm import SVC 11 | from shared import prepare_data 12 | 13 | if __name__ == "__main__": 14 | X_train, X_test, Y_train, Y_test = prepare_data(1) 15 | clf = make_pipeline(StandardScaler(), SVC()).fit(X_train,Y_train) 16 | _predict = clf.predict(X_test) 17 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 18 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 19 | fi = open('./SVM.txt', 'w') 20 | fi.write(f"precision = {precision[1]}\n") 21 | fi.write(f"recall = {recall[1]}\n") 22 | fi.write(f"F1 = {f1[1]}\n") 23 | fi.write(classification_report(Y_test, _predict)) 24 | fi.write(f"{cm}") 25 | fi.close() -------------------------------------------------------------------------------- /feature_select_corr_ML/SVM.txt: -------------------------------------------------------------------------------- 1 | precision = 0.9013282732447818 2 | recall = 0.536723163841808 3 | F1 = 0.6728045325779037 4 | precision recall f1-score support 5 | 6 | 0 0.97 1.00 0.98 12929 7 | 1 0.90 0.54 0.67 885 8 | 9 | accuracy 0.97 13814 10 | macro avg 0.94 0.77 0.83 13814 11 | weighted avg 0.96 0.97 0.96 13814 12 | [[12877 52] 13 | [ 410 475]] -------------------------------------------------------------------------------- /feature_select_corr_ML/degree_2_logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data(1) 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = LogisticRegression().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_logistic_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /feature_select_corr_ML/degree_2_logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.0 2 | recall = 0.0 3 | F1 = 0.0 4 | precision recall f1-score support 5 | 6 | 0 0.94 1.00 0.97 12929 7 | 1 0.00 0.00 0.00 885 8 | 9 | accuracy 0.94 13814 10 | macro avg 0.47 0.50 0.48 13814 11 | weighted avg 0.88 0.94 0.90 13814 12 | [[12929 0] 13 | [ 885 0]] -------------------------------------------------------------------------------- /feature_select_corr_ML/degree_2_polynominal_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data(1) 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = RidgeClassifier().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_polynominal_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /feature_select_corr_ML/degree_2_polynominal_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.7396449704142012 2 | recall = 0.5649717514124294 3 | F1 = 0.6406149903907751 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.99 0.98 12929 7 | 1 0.74 0.56 0.64 885 8 | 9 | accuracy 0.96 13814 10 | macro avg 0.86 0.78 0.81 13814 11 | weighted avg 0.96 0.96 0.96 13814 12 | [[12753 176] 13 | [ 385 500]] -------------------------------------------------------------------------------- /feature_select_corr_ML/deleted.txt: -------------------------------------------------------------------------------- 1 | 0 : False 2 | 1 : False 3 | 2 : False 4 | 3 : False 5 | 4 : False 6 | 5 : False 7 | 6 : True 8 | 7 : False 9 | 8 : False 10 | 9 : False 11 | 10 : False 12 | 11 : True 13 | 12 : False 14 | 13 : False 15 | 14 : True 16 | 15 : False 17 | 16 : False 18 | 17 : True 19 | 18 : False 20 | 19 : True 21 | 20 : False 22 | 21 : False 23 | 22 : False 24 | 23 : False 25 | 24 : False 26 | 25 : False 27 | 26 : False 28 | 27 : False 29 | 28 : False 30 | 29 : False 31 | 30 : False 32 | 31 : True 33 | 32 : False 34 | 33 : False 35 | 34 : False 36 | 35 : True 37 | 36 : False 38 | 37 : True 39 | 38 : False 40 | 39 : False 41 | 40 : False 42 | 41 : False 43 | 42 : False 44 | 43 : False 45 | 44 : False 46 | 45 : False 47 | 46 : False 48 | 47 : False 49 | 48 : False 50 | 49 : False 51 | 50 : False 52 | 51 : False 53 | 52 : True 54 | 53 : False 55 | 54 : False 56 | 55 : True 57 | 56 : False 58 | 57 : False 59 | 58 : False 60 | 59 : False 61 | 60 : False 62 | 61 : False 63 | 62 : False 64 | 63 : False 65 | 64 : False 66 | 65 : False 67 | 66 : False 68 | 67 : False 69 | 68 : False 70 | 69 : False 71 | 70 : False 72 | 71 : False 73 | 72 : False 74 | 73 : False 75 | 74 : False 76 | 75 : True 77 | 76 : False 78 | 77 : False 79 | 78 : False 80 | 79 : False 81 | 80 : False 82 | 81 : False 83 | 82 : False 84 | 83 : True 85 | 84 : False 86 | 85 : False 87 | 86 : False 88 | 87 : True 89 | 88 : False 90 | 89 : False 91 | 90 : False 92 | 91 : False 93 | 92 : True 94 | 93 : False 95 | 94 : False 96 | 95 : True 97 | 96 : True 98 | 97 : True 99 | 98 : False 100 | 99 : False 101 | 100 : False 102 | 101 : True 103 | 102 : False 104 | 103 : False 105 | 104 : True 106 | 105 : False 107 | 106 : True 108 | 107 : False 109 | 108 : False 110 | 109 : False 111 | 110 : False 112 | 111 : False 113 | 112 : False 114 | 113 : True 115 | 114 : False 116 | 115 : False 117 | 116 : True 118 | 117 : False 119 | 118 : True 120 | 119 : False 121 | 120 : False 122 | 121 : False 123 | 122 : True 124 | 123 : False 125 | 124 : True 126 | 125 : False 127 | 126 : False 128 | 127 : False 129 | 128 : True 130 | 129 : True 131 | 130 : True 132 | 131 : False 133 | 132 : False 134 | 133 : False 135 | 134 : False 136 | 135 : False 137 | 136 : False 138 | 137 : True 139 | 138 : False 140 | 139 : False 141 | 140 : True 142 | 141 : True 143 | 142 : False 144 | 143 : False 145 | 144 : False 146 | 145 : False 147 | 146 : False 148 | 147 : True 149 | 148 : False 150 | 149 : True 151 | 150 : False 152 | 151 : False 153 | 152 : False 154 | 153 : False 155 | 154 : False 156 | 155 : True 157 | 156 : False 158 | 157 : True 159 | 158 : False 160 | 159 : False 161 | 160 : False 162 | 161 : False 163 | 162 : True 164 | 163 : True 165 | 164 : False 166 | 165 : False 167 | 166 : True 168 | 167 : False 169 | 168 : False 170 | 169 : True 171 | 170 : False 172 | -------------------------------------------------------------------------------- /feature_select_corr_ML/linear_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = RidgeClassifier().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./linear_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /feature_select_corr_ML/linear_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.49696969696969695 2 | recall = 0.09265536723163842 3 | F1 = 0.15619047619047619 4 | precision recall f1-score support 5 | 6 | 0 0.94 0.99 0.97 12929 7 | 1 0.50 0.09 0.16 885 8 | 9 | accuracy 0.94 13814 10 | macro avg 0.72 0.54 0.56 13814 11 | weighted avg 0.91 0.94 0.91 13814 12 | [[12846 83] 13 | [ 803 82]] -------------------------------------------------------------------------------- /feature_select_corr_ML/logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = LogisticRegression().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./logistic_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() 24 | -------------------------------------------------------------------------------- /feature_select_corr_ML/logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.4437386569872958 2 | recall = 0.5525423728813559 3 | F1 = 0.49219929542023144 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.95 0.96 12929 7 | 1 0.44 0.55 0.49 885 8 | 9 | accuracy 0.93 13814 10 | macro avg 0.71 0.75 0.73 13814 11 | weighted avg 0.94 0.93 0.93 13814 12 | [[12316 613] 13 | [ 396 489]] -------------------------------------------------------------------------------- /feature_select_corr_ML/logistic_regressionCV.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegressionCV 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = LogisticRegressionCV(cv=10, max_iter=2000, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./logistic_regressionCV.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /feature_select_corr_ML/logistic_regressionCV.txt: -------------------------------------------------------------------------------- 1 | precision = 0.45881226053639845 2 | recall = 0.5412429378531074 3 | F1 = 0.496630378434422 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.96 0.96 12929 7 | 1 0.46 0.54 0.50 885 8 | 9 | accuracy 0.93 13814 10 | macro avg 0.71 0.75 0.73 13814 11 | weighted avg 0.94 0.93 0.93 13814 12 | [[12364 565] 13 | [ 406 479]] -------------------------------------------------------------------------------- /feature_select_corr_ML/random_forest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(0) 13 | clf = RandomForestClassifier(n_estimators=50, max_depth=100, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./random_forest.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /feature_select_corr_ML/random_forest.txt: -------------------------------------------------------------------------------- 1 | precision = 0.9556650246305419 2 | recall = 0.6576271186440678 3 | F1 = 0.7791164658634538 4 | precision recall f1-score support 5 | 6 | 0 0.98 1.00 0.99 12929 7 | 1 0.96 0.66 0.78 885 8 | 9 | accuracy 0.98 13814 10 | macro avg 0.97 0.83 0.88 13814 11 | weighted avg 0.98 0.98 0.97 13814 12 | [[12902 27] 13 | [ 303 582]] -------------------------------------------------------------------------------- /feature_select_corr_ML/run_list.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ./random_forest.py 3 | ./Mlp.py 4 | ./logistic_regressionCV.py 5 | ./SVM.py 6 | ./logistic_regression.py 7 | ./linear_regression.py 8 | ./degree_2_polynominal_regression.py 9 | ./degree_2_logistic_regression.py -------------------------------------------------------------------------------- /feature_select_corr_ML/shared.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | import seaborn as sns 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | def prepare_data(num): 11 | features = pd.read_csv('../../elliptic_bitcoin_dataset/full_data.csv',header=None) 12 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 13 | feature = [str(i) for i in range(171)] 14 | features.columns = ["txId","time_step"] + feature 15 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 16 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 17 | features.dropna(subset=['165'], inplace=True) 18 | features.dropna(subset=['166'], inplace=True) 19 | features.dropna(subset=['167'], inplace=True) 20 | features.dropna(subset=['168'], inplace=True) 21 | features.dropna(subset=['169'], inplace=True) 22 | features.dropna(subset=['170'], inplace=True) 23 | data = features[(features['class']=='1') | (features['class']=='2')] 24 | X = data[feature] 25 | Y = data['class'] 26 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 27 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 28 | cor = [] 29 | for i in range(171): 30 | x = X_train[f'{i}'].corr(Y_train) 31 | cor.append(x) 32 | corr = X_train.corr() 33 | columns = np.full((corr.shape[0],), True, dtype=bool) 34 | for i in range(corr.shape[0]): 35 | for j in range(corr.shape[0]): 36 | if corr.iloc[i,j] >= 0.35: 37 | if columns[j] and abs(cor[j]) < abs(cor[i]): 38 | if columns[i]: 39 | columns[j] = False 40 | elif columns[i] and abs(cor[j]) > abs(cor[i]): 41 | if columns[j]: 42 | columns[i] = False 43 | if num == 0: 44 | fi = open('./deleted.txt', 'w') 45 | for i in range(len(columns)): 46 | 47 | fi.write(f'{i} : {columns[i]}\n') 48 | fi.close() 49 | 50 | 51 | selected_columns = X.columns[columns] 52 | X = X[selected_columns] 53 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 54 | if num == 0: 55 | cmap = sns.diverging_palette(0, 230, 90, 60, as_cmap=True) 56 | sns.heatmap(X.corr(), cmap=cmap, cbar={'shrink':0.4, 'ticks':[-1, -0.5, 0, 0.5, 1]}) 57 | plt.savefig('../image/corr_feature_select.png') 58 | plt.close() 59 | return X_train, X_test, Y_train, Y_test 60 | -------------------------------------------------------------------------------- /feature_select_pca_ML/Mlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.neural_network import MLPClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = MLPClassifier(hidden_layer_sizes=(50, ), random_state=0, max_iter=300).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./Mlp.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() 24 | 25 | -------------------------------------------------------------------------------- /feature_select_pca_ML/Mlp.txt: -------------------------------------------------------------------------------- 1 | precision = 0.8843283582089553 2 | recall = 0.535593220338983 3 | F1 = 0.6671358198451794 4 | precision recall f1-score support 5 | 6 | 0 0.97 1.00 0.98 12929 7 | 1 0.88 0.54 0.67 885 8 | 9 | accuracy 0.97 13814 10 | macro avg 0.93 0.77 0.82 13814 11 | weighted avg 0.96 0.97 0.96 13814 12 | [[12867 62] 13 | [ 411 474]] -------------------------------------------------------------------------------- /feature_select_pca_ML/SVM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import precision_recall_fscore_support 7 | from sklearn.metrics import confusion_matrix, classification_report 8 | from sklearn.pipeline import make_pipeline 9 | from sklearn.preprocessing import StandardScaler 10 | from sklearn.svm import SVC 11 | from shared import prepare_data 12 | 13 | if __name__ == "__main__": 14 | X_train, X_test, Y_train, Y_test = prepare_data(1) 15 | clf = SVC().fit(X_train,Y_train) 16 | _predict = clf.predict(X_test) 17 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 18 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 19 | fi = open('./SVM.txt', 'w') 20 | fi.write(f"precision = {precision[1]}\n") 21 | fi.write(f"recall = {recall[1]}\n") 22 | fi.write(f"F1 = {f1[1]}\n") 23 | fi.write(classification_report(Y_test, _predict)) 24 | fi.write(f"{cm}") 25 | fi.close() -------------------------------------------------------------------------------- /feature_select_pca_ML/SVM.txt: -------------------------------------------------------------------------------- 1 | precision = 0.8754098360655738 2 | recall = 0.6033898305084746 3 | F1 = 0.7143812709030101 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.99 0.98 12929 7 | 1 0.88 0.60 0.71 885 8 | 9 | accuracy 0.97 13814 10 | macro avg 0.92 0.80 0.85 13814 11 | weighted avg 0.97 0.97 0.97 13814 12 | [[12853 76] 13 | [ 351 534]] -------------------------------------------------------------------------------- /feature_select_pca_ML/degree_2_logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data(1) 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = LogisticRegression().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_logistic_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /feature_select_pca_ML/degree_2_logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.8065693430656934 2 | recall = 0.4994350282485876 3 | F1 = 0.6168876482903001 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.99 0.98 12929 7 | 1 0.81 0.50 0.62 885 8 | 9 | accuracy 0.96 13814 10 | macro avg 0.89 0.75 0.80 13814 11 | weighted avg 0.96 0.96 0.96 13814 12 | [[12823 106] 13 | [ 443 442]] -------------------------------------------------------------------------------- /feature_select_pca_ML/degree_2_polynominal_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data(1) 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = RidgeClassifier().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_polynominal_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /feature_select_pca_ML/degree_2_polynominal_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.47568523430592397 2 | recall = 0.607909604519774 3 | F1 = 0.5337301587301587 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.95 0.96 12929 7 | 1 0.48 0.61 0.53 885 8 | 9 | accuracy 0.93 13814 10 | macro avg 0.72 0.78 0.75 13814 11 | weighted avg 0.94 0.93 0.94 13814 12 | [[12336 593] 13 | [ 347 538]] -------------------------------------------------------------------------------- /feature_select_pca_ML/linear_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = RidgeClassifier().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./linear_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /feature_select_pca_ML/linear_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.5161290322580645 2 | recall = 0.1807909604519774 3 | F1 = 0.26778242677824265 4 | precision recall f1-score support 5 | 6 | 0 0.95 0.99 0.97 12929 7 | 1 0.52 0.18 0.27 885 8 | 9 | accuracy 0.94 13814 10 | macro avg 0.73 0.58 0.62 13814 11 | weighted avg 0.92 0.94 0.92 13814 12 | [[12779 150] 13 | [ 725 160]] -------------------------------------------------------------------------------- /feature_select_pca_ML/logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = LogisticRegression().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./logistic_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() 24 | -------------------------------------------------------------------------------- /feature_select_pca_ML/logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.4257659073055774 2 | recall = 0.6124293785310735 3 | F1 = 0.5023169601482855 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.94 0.96 12929 7 | 1 0.43 0.61 0.50 885 8 | 9 | accuracy 0.92 13814 10 | macro avg 0.70 0.78 0.73 13814 11 | weighted avg 0.94 0.92 0.93 13814 12 | [[12198 731] 13 | [ 343 542]] -------------------------------------------------------------------------------- /feature_select_pca_ML/logistic_regressionCV.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegressionCV 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = LogisticRegressionCV(cv=10, max_iter=2000, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./logistic_regressionCV.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /feature_select_pca_ML/logistic_regressionCV.txt: -------------------------------------------------------------------------------- 1 | precision = 0.6769596199524941 2 | recall = 0.6440677966101694 3 | F1 = 0.6601042269832079 4 | precision recall f1-score support 5 | 6 | 0 0.98 0.98 0.98 12929 7 | 1 0.68 0.64 0.66 885 8 | 9 | accuracy 0.96 13814 10 | macro avg 0.83 0.81 0.82 13814 11 | weighted avg 0.96 0.96 0.96 13814 12 | [[12657 272] 13 | [ 315 570]] -------------------------------------------------------------------------------- /feature_select_pca_ML/random_forest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(0) 13 | clf = RandomForestClassifier(n_estimators=50, max_depth=100, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./random_forest.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /feature_select_pca_ML/random_forest.txt: -------------------------------------------------------------------------------- 1 | precision = 0.9874686716791979 2 | recall = 0.44519774011299434 3 | F1 = 0.6137071651090342 4 | precision recall f1-score support 5 | 6 | 0 0.96 1.00 0.98 12929 7 | 1 0.99 0.45 0.61 885 8 | 9 | accuracy 0.96 13814 10 | macro avg 0.98 0.72 0.80 13814 11 | weighted avg 0.96 0.96 0.96 13814 12 | [[12924 5] 13 | [ 491 394]] -------------------------------------------------------------------------------- /feature_select_pca_ML/run_list.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ./random_forest.py 3 | ./Mlp.py 4 | ./logistic_regressionCV.py 5 | ./SVM.py 6 | ./logistic_regression.py 7 | ./linear_regression.py 8 | ./degree_2_polynominal_regression.py 9 | ./degree_2_logistic_regression.py -------------------------------------------------------------------------------- /feature_select_pca_ML/shared.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | import seaborn as sns 7 | import matplotlib.pyplot as plt 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn.decomposition import PCA 10 | import statsmodels.api as sm 11 | 12 | def prepare_data(num): 13 | features = pd.read_csv('../../elliptic_bitcoin_dataset/full_data.csv',header=None) 14 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 15 | feature = [str(i) for i in range(171)] 16 | features.columns = ["txId","time_step"] + feature 17 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 18 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 19 | features.dropna(subset=['165'], inplace=True) 20 | features.dropna(subset=['166'], inplace=True) 21 | features.dropna(subset=['167'], inplace=True) 22 | features.dropna(subset=['168'], inplace=True) 23 | features.dropna(subset=['169'], inplace=True) 24 | features.dropna(subset=['170'], inplace=True) 25 | data = features[(features['class']=='1') | (features['class']=='2')] 26 | X = data[feature] 27 | Y = data['class'] 28 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 29 | std = StandardScaler() 30 | X = std.fit_transform(X) 31 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 32 | pca = PCA(n_components = X.shape[1] - 1) 33 | pca.fit(X_train) 34 | X_train = pca.transform(X_train) 35 | X_test = pca.transform(X_test) 36 | del_lst = [30, 37, 44, 49, 50, 51, 59, 60, 65, 66, 72, 75, 76, 77, 78, 85, 86, 87, 89, 90, 91, 92, 93, 96, 97, 99, 100, 101, 102, 104, 106, 107, 110, 112, 115, 119, 120, 122, 124, 125, 127, 128, 129, 131, 132, 134, 135, 136, 137, 138, 139, 140, 142, 143, 144, 145, 146, 147, 149, 150, 151, 152, 153, 154, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169] 37 | X_train = np.delete(X_train, del_lst, axis=1) 38 | X_test = np.delete(X_test, del_lst, axis=1) 39 | return X_train, X_test, Y_train, Y_test -------------------------------------------------------------------------------- /image/Feature000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature000.png -------------------------------------------------------------------------------- /image/Feature001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature001.png -------------------------------------------------------------------------------- /image/Feature002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature002.png -------------------------------------------------------------------------------- /image/Feature003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature003.png -------------------------------------------------------------------------------- /image/Feature004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature004.png -------------------------------------------------------------------------------- /image/Feature005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature005.png -------------------------------------------------------------------------------- /image/Feature006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature006.png -------------------------------------------------------------------------------- /image/Feature007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature007.png -------------------------------------------------------------------------------- /image/Feature008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature008.png -------------------------------------------------------------------------------- /image/Feature009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature009.png -------------------------------------------------------------------------------- /image/Feature010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature010.png -------------------------------------------------------------------------------- /image/Feature011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature011.png -------------------------------------------------------------------------------- /image/Feature012.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature012.png -------------------------------------------------------------------------------- /image/Feature013.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature013.png -------------------------------------------------------------------------------- /image/Feature014.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature014.png -------------------------------------------------------------------------------- /image/Feature015.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature015.png -------------------------------------------------------------------------------- /image/Feature016.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature016.png -------------------------------------------------------------------------------- /image/Feature017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature017.png -------------------------------------------------------------------------------- /image/Feature018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature018.png -------------------------------------------------------------------------------- /image/Feature019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature019.png -------------------------------------------------------------------------------- /image/Feature020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature020.png -------------------------------------------------------------------------------- /image/Feature021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature021.png -------------------------------------------------------------------------------- /image/Feature022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature022.png -------------------------------------------------------------------------------- /image/Feature023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature023.png -------------------------------------------------------------------------------- /image/Feature024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature024.png -------------------------------------------------------------------------------- /image/Feature025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature025.png -------------------------------------------------------------------------------- /image/Feature026.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature026.png -------------------------------------------------------------------------------- /image/Feature027.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature027.png -------------------------------------------------------------------------------- /image/Feature028.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature028.png -------------------------------------------------------------------------------- /image/Feature029.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature029.png -------------------------------------------------------------------------------- /image/Feature030.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature030.png -------------------------------------------------------------------------------- /image/Feature031.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature031.png -------------------------------------------------------------------------------- /image/Feature032.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature032.png -------------------------------------------------------------------------------- /image/Feature033.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature033.png -------------------------------------------------------------------------------- /image/Feature034.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature034.png -------------------------------------------------------------------------------- /image/Feature035.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature035.png -------------------------------------------------------------------------------- /image/Feature036.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature036.png -------------------------------------------------------------------------------- /image/Feature037.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature037.png -------------------------------------------------------------------------------- /image/Feature038.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature038.png -------------------------------------------------------------------------------- /image/Feature039.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature039.png -------------------------------------------------------------------------------- /image/Feature040.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature040.png -------------------------------------------------------------------------------- /image/Feature041.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature041.png -------------------------------------------------------------------------------- /image/Feature042.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature042.png -------------------------------------------------------------------------------- /image/Feature043.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature043.png -------------------------------------------------------------------------------- /image/Feature044.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature044.png -------------------------------------------------------------------------------- /image/Feature045.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature045.png -------------------------------------------------------------------------------- /image/Feature046.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature046.png -------------------------------------------------------------------------------- /image/Feature047.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature047.png -------------------------------------------------------------------------------- /image/Feature048.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature048.png -------------------------------------------------------------------------------- /image/Feature049.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature049.png -------------------------------------------------------------------------------- /image/Feature050.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature050.png -------------------------------------------------------------------------------- /image/Feature051.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature051.png -------------------------------------------------------------------------------- /image/Feature052.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature052.png -------------------------------------------------------------------------------- /image/Feature053.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature053.png -------------------------------------------------------------------------------- /image/Feature054.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature054.png -------------------------------------------------------------------------------- /image/Feature055.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature055.png -------------------------------------------------------------------------------- /image/Feature056.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature056.png -------------------------------------------------------------------------------- /image/Feature057.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature057.png -------------------------------------------------------------------------------- /image/Feature058.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature058.png -------------------------------------------------------------------------------- /image/Feature059.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature059.png -------------------------------------------------------------------------------- /image/Feature060.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature060.png -------------------------------------------------------------------------------- /image/Feature061.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature061.png -------------------------------------------------------------------------------- /image/Feature062.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature062.png -------------------------------------------------------------------------------- /image/Feature063.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature063.png -------------------------------------------------------------------------------- /image/Feature064.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature064.png -------------------------------------------------------------------------------- /image/Feature065.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature065.png -------------------------------------------------------------------------------- /image/Feature066.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature066.png -------------------------------------------------------------------------------- /image/Feature067.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature067.png -------------------------------------------------------------------------------- /image/Feature068.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature068.png -------------------------------------------------------------------------------- /image/Feature069.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature069.png -------------------------------------------------------------------------------- /image/Feature070.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature070.png -------------------------------------------------------------------------------- /image/Feature071.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature071.png -------------------------------------------------------------------------------- /image/Feature072.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature072.png -------------------------------------------------------------------------------- /image/Feature073.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature073.png -------------------------------------------------------------------------------- /image/Feature074.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature074.png -------------------------------------------------------------------------------- /image/Feature075.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature075.png -------------------------------------------------------------------------------- /image/Feature076.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature076.png -------------------------------------------------------------------------------- /image/Feature077.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature077.png -------------------------------------------------------------------------------- /image/Feature078.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature078.png -------------------------------------------------------------------------------- /image/Feature079.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature079.png -------------------------------------------------------------------------------- /image/Feature080.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature080.png -------------------------------------------------------------------------------- /image/Feature081.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature081.png -------------------------------------------------------------------------------- /image/Feature082.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature082.png -------------------------------------------------------------------------------- /image/Feature083.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature083.png -------------------------------------------------------------------------------- /image/Feature084.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature084.png -------------------------------------------------------------------------------- /image/Feature085.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature085.png -------------------------------------------------------------------------------- /image/Feature086.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature086.png -------------------------------------------------------------------------------- /image/Feature087.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature087.png -------------------------------------------------------------------------------- /image/Feature088.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature088.png -------------------------------------------------------------------------------- /image/Feature089.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature089.png -------------------------------------------------------------------------------- /image/Feature090.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature090.png -------------------------------------------------------------------------------- /image/Feature091.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature091.png -------------------------------------------------------------------------------- /image/Feature092.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature092.png -------------------------------------------------------------------------------- /image/Feature093.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature093.png -------------------------------------------------------------------------------- /image/Feature094.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature094.png -------------------------------------------------------------------------------- /image/Feature095.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature095.png -------------------------------------------------------------------------------- /image/Feature096.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature096.png -------------------------------------------------------------------------------- /image/Feature097.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature097.png -------------------------------------------------------------------------------- /image/Feature098.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature098.png -------------------------------------------------------------------------------- /image/Feature099.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature099.png -------------------------------------------------------------------------------- /image/Feature100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature100.png -------------------------------------------------------------------------------- /image/Feature101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature101.png -------------------------------------------------------------------------------- /image/Feature102.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature102.png -------------------------------------------------------------------------------- /image/Feature103.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature103.png -------------------------------------------------------------------------------- /image/Feature104.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature104.png -------------------------------------------------------------------------------- /image/Feature105.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature105.png -------------------------------------------------------------------------------- /image/Feature106.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature106.png -------------------------------------------------------------------------------- /image/Feature107.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature107.png -------------------------------------------------------------------------------- /image/Feature108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature108.png -------------------------------------------------------------------------------- /image/Feature109.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature109.png -------------------------------------------------------------------------------- /image/Feature110.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature110.png -------------------------------------------------------------------------------- /image/Feature111.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature111.png -------------------------------------------------------------------------------- /image/Feature112.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature112.png -------------------------------------------------------------------------------- /image/Feature113.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature113.png -------------------------------------------------------------------------------- /image/Feature114.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature114.png -------------------------------------------------------------------------------- /image/Feature115.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature115.png -------------------------------------------------------------------------------- /image/Feature116.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature116.png -------------------------------------------------------------------------------- /image/Feature117.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature117.png -------------------------------------------------------------------------------- /image/Feature118.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature118.png -------------------------------------------------------------------------------- /image/Feature119.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature119.png -------------------------------------------------------------------------------- /image/Feature120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature120.png -------------------------------------------------------------------------------- /image/Feature121.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature121.png -------------------------------------------------------------------------------- /image/Feature122.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature122.png -------------------------------------------------------------------------------- /image/Feature123.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature123.png -------------------------------------------------------------------------------- /image/Feature124.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature124.png -------------------------------------------------------------------------------- /image/Feature125.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature125.png -------------------------------------------------------------------------------- /image/Feature126.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature126.png -------------------------------------------------------------------------------- /image/Feature127.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature127.png -------------------------------------------------------------------------------- /image/Feature128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature128.png -------------------------------------------------------------------------------- /image/Feature129.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature129.png -------------------------------------------------------------------------------- /image/Feature130.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature130.png -------------------------------------------------------------------------------- /image/Feature131.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature131.png -------------------------------------------------------------------------------- /image/Feature132.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature132.png -------------------------------------------------------------------------------- /image/Feature133.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature133.png -------------------------------------------------------------------------------- /image/Feature134.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature134.png -------------------------------------------------------------------------------- /image/Feature135.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature135.png -------------------------------------------------------------------------------- /image/Feature136.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature136.png -------------------------------------------------------------------------------- /image/Feature137.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature137.png -------------------------------------------------------------------------------- /image/Feature138.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature138.png -------------------------------------------------------------------------------- /image/Feature139.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature139.png -------------------------------------------------------------------------------- /image/Feature140.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature140.png -------------------------------------------------------------------------------- /image/Feature141.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature141.png -------------------------------------------------------------------------------- /image/Feature142.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature142.png -------------------------------------------------------------------------------- /image/Feature143.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature143.png -------------------------------------------------------------------------------- /image/Feature144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature144.png -------------------------------------------------------------------------------- /image/Feature145.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature145.png -------------------------------------------------------------------------------- /image/Feature146.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature146.png -------------------------------------------------------------------------------- /image/Feature147.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature147.png -------------------------------------------------------------------------------- /image/Feature148.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature148.png -------------------------------------------------------------------------------- /image/Feature149.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature149.png -------------------------------------------------------------------------------- /image/Feature150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature150.png -------------------------------------------------------------------------------- /image/Feature151.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature151.png -------------------------------------------------------------------------------- /image/Feature152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature152.png -------------------------------------------------------------------------------- /image/Feature153.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature153.png -------------------------------------------------------------------------------- /image/Feature154.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature154.png -------------------------------------------------------------------------------- /image/Feature155.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature155.png -------------------------------------------------------------------------------- /image/Feature156.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature156.png -------------------------------------------------------------------------------- /image/Feature157.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature157.png -------------------------------------------------------------------------------- /image/Feature158.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature158.png -------------------------------------------------------------------------------- /image/Feature159.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature159.png -------------------------------------------------------------------------------- /image/Feature160.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature160.png -------------------------------------------------------------------------------- /image/Feature161.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature161.png -------------------------------------------------------------------------------- /image/Feature162.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature162.png -------------------------------------------------------------------------------- /image/Feature163.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature163.png -------------------------------------------------------------------------------- /image/Feature164.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature164.png -------------------------------------------------------------------------------- /image/Feature165.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature165.png -------------------------------------------------------------------------------- /image/Feature166.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature166.png -------------------------------------------------------------------------------- /image/Feature167.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature167.png -------------------------------------------------------------------------------- /image/Feature168.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature168.png -------------------------------------------------------------------------------- /image/Feature169.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature169.png -------------------------------------------------------------------------------- /image/Feature170.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/Feature170.png -------------------------------------------------------------------------------- /image/corr_feature_select.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/corr_feature_select.png -------------------------------------------------------------------------------- /image/corr_modified.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/corr_modified.png -------------------------------------------------------------------------------- /image/corr_pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/corr_pca.png -------------------------------------------------------------------------------- /image/corr_raw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/corr_raw.png -------------------------------------------------------------------------------- /image/corr_y_modified.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/corr_y_modified.png -------------------------------------------------------------------------------- /image/corr_y_pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/corr_y_pca.png -------------------------------------------------------------------------------- /image/corr_y_raw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/corr_y_raw.png -------------------------------------------------------------------------------- /image/llicit2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/llicit2D.png -------------------------------------------------------------------------------- /image/vif_modified.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/vif_modified.png -------------------------------------------------------------------------------- /image/vif_raw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexLee1999/Elliptic-Dataset-Analysis/14f3efb17cc62059528c9c6ea54493ddb281e2e9/image/vif_raw.png -------------------------------------------------------------------------------- /modified_ML/Mlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.neural_network import MLPClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data() 13 | clf = MLPClassifier(hidden_layer_sizes=(50, ), random_state=0, max_iter=300).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./Mlp.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() 24 | -------------------------------------------------------------------------------- /modified_ML/Mlp.txt: -------------------------------------------------------------------------------- 1 | precision = 0.0 2 | recall = 0.0 3 | F1 = 0.0 4 | precision recall f1-score support 5 | 6 | 0 0.94 1.00 0.97 12929 7 | 1 0.00 0.00 0.00 885 8 | 9 | accuracy 0.94 13814 10 | macro avg 0.47 0.50 0.48 13814 11 | weighted avg 0.88 0.94 0.90 13814 12 | [[12929 0] 13 | [ 885 0]] -------------------------------------------------------------------------------- /modified_ML/SVM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import precision_recall_fscore_support 7 | from sklearn.metrics import confusion_matrix, classification_report 8 | from sklearn.pipeline import make_pipeline 9 | from sklearn.preprocessing import StandardScaler 10 | from sklearn.svm import SVC 11 | from shared import prepare_data 12 | 13 | if __name__ == "__main__": 14 | 15 | X_train, X_test, Y_train, Y_test = prepare_data() 16 | 17 | clf = make_pipeline(StandardScaler(), SVC()).fit(X_train,Y_train) 18 | _predict = clf.predict(X_test) 19 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | fi = open('./SVM.txt', 'w') 22 | fi.write(f"precision = {precision[1]}\n") 23 | fi.write(f"recall = {recall[1]}\n") 24 | fi.write(f"F1 = {f1[1]}\n") 25 | fi.write(classification_report(Y_test, _predict)) 26 | fi.write(f"{cm}") 27 | fi.close() -------------------------------------------------------------------------------- /modified_ML/SVM.txt: -------------------------------------------------------------------------------- 1 | precision = 0.8878842676311031 2 | recall = 0.5548022598870056 3 | F1 = 0.6828929068150209 4 | precision recall f1-score support 5 | 6 | 0 0.97 1.00 0.98 12929 7 | 1 0.89 0.55 0.68 885 8 | 9 | accuracy 0.97 13814 10 | macro avg 0.93 0.78 0.83 13814 11 | weighted avg 0.97 0.97 0.96 13814 12 | [[12867 62] 13 | [ 394 491]] -------------------------------------------------------------------------------- /modified_ML/degree_2_logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data() 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = LogisticRegression().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_logistic_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /modified_ML/degree_2_polynominal_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data() 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = RidgeClassifier().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_polynominal_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /modified_ML/linear_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data() 13 | clf = RidgeClassifier().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./linear_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /modified_ML/linear_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.5197368421052632 2 | recall = 0.17853107344632768 3 | F1 = 0.2657695542472666 4 | precision recall f1-score support 5 | 6 | 0 0.95 0.99 0.97 12929 7 | 1 0.52 0.18 0.27 885 8 | 9 | accuracy 0.94 13814 10 | macro avg 0.73 0.58 0.62 13814 11 | weighted avg 0.92 0.94 0.92 13814 12 | [[12783 146] 13 | [ 727 158]] -------------------------------------------------------------------------------- /modified_ML/logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data() 13 | clf = LogisticRegression().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./logistic_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() 24 | -------------------------------------------------------------------------------- /modified_ML/logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.0 2 | recall = 0.0 3 | F1 = 0.0 4 | precision recall f1-score support 5 | 6 | 0 0.94 1.00 0.97 12929 7 | 1 0.00 0.00 0.00 885 8 | 9 | accuracy 0.94 13814 10 | macro avg 0.47 0.50 0.48 13814 11 | weighted avg 0.88 0.94 0.90 13814 12 | [[12929 0] 13 | [ 885 0]] -------------------------------------------------------------------------------- /modified_ML/logistic_regressionCV.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegressionCV 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data() 13 | clf = LogisticRegressionCV(cv=10, max_iter=2000, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./logistic_regressionCV.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /modified_ML/logistic_regressionCV.txt: -------------------------------------------------------------------------------- 1 | precision = 0.0 2 | recall = 0.0 3 | F1 = 0.0 4 | precision recall f1-score support 5 | 6 | 0 0.94 1.00 0.97 12929 7 | 1 0.00 0.00 0.00 885 8 | 9 | accuracy 0.94 13814 10 | macro avg 0.47 0.50 0.48 13814 11 | weighted avg 0.88 0.94 0.90 13814 12 | [[12929 0] 13 | [ 885 0]] -------------------------------------------------------------------------------- /modified_ML/random_forest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data() 13 | clf = RandomForestClassifier(n_estimators=50, max_depth=100, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./random_forest.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /modified_ML/random_forest.txt: -------------------------------------------------------------------------------- 1 | precision = 0.9847457627118644 2 | recall = 0.656497175141243 3 | F1 = 0.7877966101694917 4 | precision recall f1-score support 5 | 6 | 0 0.98 1.00 0.99 12929 7 | 1 0.98 0.66 0.79 885 8 | 9 | accuracy 0.98 13814 10 | macro avg 0.98 0.83 0.89 13814 11 | weighted avg 0.98 0.98 0.98 13814 12 | [[12920 9] 13 | [ 304 581]] -------------------------------------------------------------------------------- /modified_ML/run_list.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ./random_forest.py 3 | ./Mlp.py 4 | ./logistic_regressionCV.py 5 | ./SVM.py 6 | ./logistic_regression.py 7 | ./linear_regression.py 8 | ./degree_2_polynominal_regression.py 9 | ./degree_2_logistic_regression.py -------------------------------------------------------------------------------- /modified_ML/shared.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | 7 | 8 | 9 | def prepare_data(): 10 | features = pd.read_csv('../../elliptic_bitcoin_dataset/full_data.csv',header=None) 11 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 12 | feature = [str(i) for i in range(171)] 13 | features.columns = ["txId","time_step"] + feature 14 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 15 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 16 | features.dropna(subset=['165'], inplace=True) 17 | features.dropna(subset=['166'], inplace=True) 18 | features.dropna(subset=['167'], inplace=True) 19 | features.dropna(subset=['168'], inplace=True) 20 | features.dropna(subset=['169'], inplace=True) 21 | features.dropna(subset=['170'], inplace=True) 22 | data = features[(features['class']=='1') | (features['class']=='2')] 23 | X = data[feature] 24 | Y = data['class'] 25 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 26 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 27 | return X_train, X_test, Y_train, Y_test -------------------------------------------------------------------------------- /orginizing_data_src/classify_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Usage : 5 | - Input feature file and class file 6 | - Output feature file 7 | - Change file name if you need 8 | ''' 9 | import numpy as np 10 | file_features = '../../elliptic_bitcoin_dataset/elliptic_txs_features.csv' 11 | file_class = '../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv' 12 | licit_data_csv = "../../elliptic_bitcoin_dataset/licit_data.csv" 13 | illicit_data_csv = "../../elliptic_bitcoin_dataset/illicit_data.csv" 14 | unknown_data_csv = "../../elliptic_bitcoin_dataset/unknown_data.csv" 15 | 16 | def read_features_file(): 17 | with open(file_features) as f: 18 | FH = np.genfromtxt(f, delimiter=',', dtype='str') 19 | return FH 20 | 21 | 22 | def read_file_class(): 23 | with open(file_class) as f: 24 | FH = np.genfromtxt(f, delimiter=',', dtype='str') 25 | return FH 26 | 27 | 28 | def classify_data(features,classes): 29 | del_lst = [] 30 | ill_lst = [] 31 | for i in range(len(features)): 32 | if classes[i][1] == 'unknown': 33 | del_lst.append(i) 34 | elif classes[i][1] == '1': 35 | ill_lst.append(i) 36 | for i in range(len(del_lst)): 37 | features[del_lst[i]][0] = '-1' 38 | np.savetxt(unknown_data_csv, features[del_lst], delimiter=",",fmt="%s") 39 | np.savetxt(illicit_data_csv, features[ill_lst], delimiter=",",fmt="%s") 40 | tot = del_lst+ill_lst 41 | tot.sort() 42 | features = np.delete(features, tot, axis=0) 43 | np.savetxt(licit_data_csv, features, delimiter=",",fmt="%s") 44 | 45 | 46 | if __name__ == "__main__": 47 | results_class = read_file_class() 48 | results_features = read_features_file() 49 | results_class = results_class[1::] 50 | classify_data(results_features, results_class) -------------------------------------------------------------------------------- /orginizing_data_src/connect_real_txs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Usage : 5 | - Input Result file 6 | - Output Json key = id and value = real id 7 | - Change file name if you need 8 | ''' 9 | import json 10 | import numpy as np 11 | result_file = '../../elliptic_bitcoin_dataset/Result.csv' 12 | json_file = '../../elliptic_bitcoin_dataset/result.json' 13 | file_features = '../../elliptic_bitcoin_dataset/elliptic_txs_features.csv' 14 | output_csv = "../../elliptic_bitcoin_dataset/full_data.csv" 15 | 16 | 17 | def read_result_file(): 18 | with open(result_file) as f: 19 | FH = np.genfromtxt(f, delimiter=',', dtype='str') 20 | return FH 21 | 22 | def read_features_file(): 23 | with open(file_features) as f: 24 | FH = np.genfromtxt(f, delimiter=',', dtype='str') 25 | return FH 26 | 27 | 28 | def output_json(arr): 29 | dict1 = {arr[i][0] : arr[i][1] for i in range(len(arr))} 30 | with open(json_file, 'w') as fp: 31 | json.dump(dict1, fp) 32 | return dict1 33 | 34 | 35 | if __name__ == "__main__": 36 | result = read_result_file() 37 | features = read_features_file() 38 | a = np.empty((203769,6)) 39 | a[:] = np.NaN 40 | features = np.concatenate((features, a), 1) 41 | result = result[1:] 42 | 43 | dic = output_json(result) 44 | for i in range(len(features)): 45 | if features[i][0] in dic: 46 | with open(f'../../txs/{dic[features[i][0]]}.json') as f: 47 | data = json.load(f) 48 | if 'block_height' in data: 49 | features[i][-1] = data['block_height'] 50 | if 'weight' in data: 51 | features[i][-2] = data['weight'] 52 | if 'vin_sz' in data: 53 | features[i][-3] = data['vin_sz'] 54 | if 'vout_sz' in data: 55 | features[i][-4] = data['vout_sz'] 56 | if 'size' in data: 57 | features[i][-5] = data['size'] 58 | if 'out' in data: 59 | su = 0 60 | for d in data['out']: 61 | su += d['value'] 62 | features[i][-6] = su 63 | 64 | 65 | np.savetxt(output_csv, features, delimiter=",",fmt="%s") 66 | -------------------------------------------------------------------------------- /orginizing_data_src/graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Usage : 5 | - Input feature file, class file, edge list 6 | - Output Json file with key = id and value = type 7 | - Change file name if you need 8 | ''' 9 | import numpy as np 10 | import json 11 | 12 | 13 | file_features = '../../elliptic_bitcoin_dataset/elliptic_txs_features.csv' 14 | file_edge = '../../elliptic_bitcoin_dataset/elliptic_txs_edgelist.csv' 15 | file_class = '../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv' 16 | json_file = '../../elliptic_bitcoin_dataset/class.json' 17 | graph_file = '../../elliptic_bitcoin_dataset/graph.json' 18 | output_csv = "../../elliptic_bitcoin_dataset/full_data.csv" 19 | 20 | 21 | 22 | def read_features_file(): 23 | with open(file_features) as f: 24 | FH = np.genfromtxt(f, delimiter=',', dtype='str') 25 | return FH 26 | 27 | 28 | def read_edge_file(): 29 | with open(file_edge) as f: 30 | FH = np.loadtxt(f, delimiter=',', skiprows=1, dtype='str') 31 | return FH 32 | 33 | 34 | def read_file_class(): 35 | with open(file_class) as f: 36 | FH = np.genfromtxt(f, delimiter=',', dtype='str') 37 | return FH 38 | 39 | 40 | def output_json(arr): 41 | dict1 = {arr[i][0] : arr[i][1] for i in range(len(arr))} 42 | with open(json_file, 'w') as fp: 43 | json.dump(dict1, fp) 44 | return dict1 45 | 46 | 47 | if __name__ == "__main__": 48 | results_features = read_features_file() 49 | results_class = read_file_class() 50 | results_edge = read_edge_file() 51 | results_class = results_class[1::] 52 | results_id = results_class[:, :-1] 53 | r = results_class[:,0] 54 | dict1 = output_json(results_class) 55 | dict2 = {} 56 | for i in r: 57 | dict2[f'{i}'] = {'in':[], 'out':[]} 58 | for i in range(len(results_edge)): 59 | dict2[results_edge[i][0]]['out'].append(results_edge[i][1]) 60 | dict2[results_edge[i][1]]['in'].append(results_edge[i][0]) 61 | with open(graph_file, 'w') as fp: 62 | json.dump(dict2, fp) 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /pca_ML/Mlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.neural_network import MLPClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = MLPClassifier(hidden_layer_sizes=(50, ), random_state=0, max_iter=300).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./Mlp.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() 24 | 25 | -------------------------------------------------------------------------------- /pca_ML/Mlp.txt: -------------------------------------------------------------------------------- 1 | precision = 0.8106870229007633 2 | recall = 0.6 3 | F1 = 0.6896103896103896 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.99 0.98 12929 7 | 1 0.81 0.60 0.69 885 8 | 9 | accuracy 0.97 13814 10 | macro avg 0.89 0.80 0.84 13814 11 | weighted avg 0.96 0.97 0.96 13814 12 | [[12805 124] 13 | [ 354 531]] -------------------------------------------------------------------------------- /pca_ML/SVM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import precision_recall_fscore_support 7 | from sklearn.metrics import confusion_matrix, classification_report 8 | from sklearn.pipeline import make_pipeline 9 | from sklearn.preprocessing import StandardScaler 10 | from sklearn.svm import SVC 11 | from shared import prepare_data 12 | 13 | if __name__ == "__main__": 14 | 15 | X_train, X_test, Y_train, Y_test = prepare_data(1) 16 | 17 | clf = SVC().fit(X_train,Y_train) 18 | _predict = clf.predict(X_test) 19 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | fi = open('./SVM.txt', 'w') 22 | fi.write(f"precision = {precision[1]}\n") 23 | fi.write(f"recall = {recall[1]}\n") 24 | fi.write(f"F1 = {f1[1]}\n") 25 | fi.write(classification_report(Y_test, _predict)) 26 | fi.write(f"{cm}") 27 | fi.close() -------------------------------------------------------------------------------- /pca_ML/SVM.txt: -------------------------------------------------------------------------------- 1 | precision = 0.8862876254180602 2 | recall = 0.5988700564971752 3 | F1 = 0.7147673634524612 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.99 0.98 12929 7 | 1 0.89 0.60 0.71 885 8 | 9 | accuracy 0.97 13814 10 | macro avg 0.93 0.80 0.85 13814 11 | weighted avg 0.97 0.97 0.97 13814 12 | [[12861 68] 13 | [ 355 530]] -------------------------------------------------------------------------------- /pca_ML/degree_2_logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data(1) 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = LogisticRegression().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_logistic_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /pca_ML/degree_2_logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.7938931297709924 2 | recall = 0.47005649717514125 3 | F1 = 0.5904897090134847 4 | precision recall f1-score support 5 | 6 | 0 0.96 0.99 0.98 12929 7 | 1 0.79 0.47 0.59 885 8 | 9 | accuracy 0.96 13814 10 | macro avg 0.88 0.73 0.78 13814 11 | weighted avg 0.95 0.96 0.95 13814 12 | [[12821 108] 13 | [ 469 416]] -------------------------------------------------------------------------------- /pca_ML/degree_2_polynominal_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data(1) 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = RidgeClassifier().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_polynominal_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /pca_ML/degree_2_polynominal_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.4048852701702443 2 | recall = 0.6180790960451977 3 | F1 = 0.48926654740608233 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.94 0.96 12929 7 | 1 0.40 0.62 0.49 885 8 | 9 | accuracy 0.92 13814 10 | macro avg 0.69 0.78 0.72 13814 11 | weighted avg 0.94 0.92 0.93 13814 12 | [[12125 804] 13 | [ 338 547]] -------------------------------------------------------------------------------- /pca_ML/linear_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = RidgeClassifier().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./linear_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /pca_ML/linear_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.5197368421052632 2 | recall = 0.17853107344632768 3 | F1 = 0.2657695542472666 4 | precision recall f1-score support 5 | 6 | 0 0.95 0.99 0.97 12929 7 | 1 0.52 0.18 0.27 885 8 | 9 | accuracy 0.94 13814 10 | macro avg 0.73 0.58 0.62 13814 11 | weighted avg 0.92 0.94 0.92 13814 12 | [[12783 146] 13 | [ 727 158]] -------------------------------------------------------------------------------- /pca_ML/logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = LogisticRegression().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./logistic_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() 24 | -------------------------------------------------------------------------------- /pca_ML/logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.40602696272799366 2 | recall = 0.5785310734463277 3 | F1 = 0.47716682199440824 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.94 0.96 12929 7 | 1 0.41 0.58 0.48 885 8 | 9 | accuracy 0.92 13814 10 | macro avg 0.69 0.76 0.72 13814 11 | weighted avg 0.93 0.92 0.93 13814 12 | [[12180 749] 13 | [ 373 512]] -------------------------------------------------------------------------------- /pca_ML/logistic_regressionCV.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegressionCV 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(1) 13 | clf = LogisticRegressionCV(cv=10, max_iter=2000, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./logistic_regressionCV.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /pca_ML/logistic_regressionCV.txt: -------------------------------------------------------------------------------- 1 | precision = 0.7082294264339152 2 | recall = 0.6418079096045197 3 | F1 = 0.6733847065797274 4 | precision recall f1-score support 5 | 6 | 0 0.98 0.98 0.98 12929 7 | 1 0.71 0.64 0.67 885 8 | 9 | accuracy 0.96 13814 10 | macro avg 0.84 0.81 0.83 13814 11 | weighted avg 0.96 0.96 0.96 13814 12 | [[12695 234] 13 | [ 317 568]] -------------------------------------------------------------------------------- /pca_ML/random_forest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data(0) 13 | clf = RandomForestClassifier(n_estimators=50, max_depth=100, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./random_forest.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /pca_ML/random_forest.txt: -------------------------------------------------------------------------------- 1 | precision = 0.9975961538461539 2 | recall = 0.4689265536723164 3 | F1 = 0.6379707916986932 4 | precision recall f1-score support 5 | 6 | 0 0.96 1.00 0.98 12929 7 | 1 1.00 0.47 0.64 885 8 | 9 | accuracy 0.97 13814 10 | macro avg 0.98 0.73 0.81 13814 11 | weighted avg 0.97 0.97 0.96 13814 12 | [[12928 1] 13 | [ 470 415]] -------------------------------------------------------------------------------- /pca_ML/run_list.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ./random_forest.py 3 | ./Mlp.py 4 | ./logistic_regressionCV.py 5 | ./SVM.py 6 | ./logistic_regression.py 7 | ./linear_regression.py 8 | ./degree_2_polynominal_regression.py 9 | ./degree_2_logistic_regression.py -------------------------------------------------------------------------------- /pca_ML/shared.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | import seaborn as sns 7 | import matplotlib.pyplot as plt 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn.decomposition import PCA 10 | 11 | 12 | def prepare_data(num): 13 | features = pd.read_csv('../../elliptic_bitcoin_dataset/full_data.csv',header=None) 14 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 15 | feature = [str(i) for i in range(171)] 16 | features.columns = ["txId","time_step"] + feature 17 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 18 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 19 | features.dropna(subset=['165'], inplace=True) 20 | features.dropna(subset=['166'], inplace=True) 21 | features.dropna(subset=['167'], inplace=True) 22 | features.dropna(subset=['168'], inplace=True) 23 | features.dropna(subset=['169'], inplace=True) 24 | features.dropna(subset=['170'], inplace=True) 25 | data = features[(features['class']=='1') | (features['class']=='2')] 26 | X = data[feature] 27 | Y = data['class'] 28 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 29 | std = StandardScaler() 30 | X = std.fit_transform(X) 31 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 32 | pca = PCA(n_components = X.shape[1] - 1) 33 | pca.fit(X_train) 34 | X_train = pca.transform(X_train) 35 | X_test = pca.transform(X_test) 36 | return X_train, X_test, Y_train, Y_test -------------------------------------------------------------------------------- /raw_ML/Mlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.neural_network import MLPClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data() 13 | 14 | clf = MLPClassifier(hidden_layer_sizes=(50, ),random_state=0, max_iter=300).fit(X_train, Y_train) 15 | 16 | _predict = clf.predict(X_test) 17 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 18 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 19 | fi = open('./Mlp.txt', 'w') 20 | fi.write(f"precision = {precision[1]}\n") 21 | fi.write(f"recall = {recall[1]}\n") 22 | fi.write(f"F1 = {f1[1]}\n") 23 | fi.write(classification_report(Y_test, _predict)) 24 | fi.write(f"{cm}") 25 | fi.close() 26 | -------------------------------------------------------------------------------- /raw_ML/Mlp.txt: -------------------------------------------------------------------------------- 1 | precision = 0.7492877492877493 2 | recall = 0.5984072810011376 3 | F1 = 0.6654016445287793 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.99 0.98 13091 7 | 1 0.75 0.60 0.67 879 8 | 9 | accuracy 0.96 13970 10 | macro avg 0.86 0.79 0.82 13970 11 | weighted avg 0.96 0.96 0.96 13970 12 | [[12915 176] 13 | [ 353 526]] -------------------------------------------------------------------------------- /raw_ML/SVM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from sklearn.pipeline import make_pipeline 10 | from sklearn.preprocessing import StandardScaler 11 | from sklearn.svm import SVC 12 | from shared import prepare_data 13 | 14 | if __name__ == "__main__": 15 | 16 | X_train, X_test, Y_train, Y_test = prepare_data() 17 | 18 | clf = make_pipeline(StandardScaler(), SVC()).fit(X_train,Y_train) 19 | _predict = clf.predict(X_test) 20 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 21 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 22 | fi = open('./SVM.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /raw_ML/SVM.txt: -------------------------------------------------------------------------------- 1 | precision = 0.8854545454545455 2 | recall = 0.5540386803185438 3 | F1 = 0.6815955213435969 4 | precision recall f1-score support 5 | 6 | 0 0.97 1.00 0.98 13091 7 | 1 0.89 0.55 0.68 879 8 | 9 | accuracy 0.97 13970 10 | macro avg 0.93 0.77 0.83 13970 11 | weighted avg 0.97 0.97 0.96 13970 12 | [[13028 63] 13 | [ 392 487]] -------------------------------------------------------------------------------- /raw_ML/degree_2_logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data() 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = LogisticRegression().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_logistic_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /raw_ML/degree_2_logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.7391304347826086 2 | recall = 0.4254835039817975 3 | F1 = 0.540072202166065 4 | precision recall f1-score support 5 | 6 | 0 0.96 0.99 0.98 13091 7 | 1 0.74 0.43 0.54 879 8 | 9 | accuracy 0.95 13970 10 | macro avg 0.85 0.71 0.76 13970 11 | weighted avg 0.95 0.95 0.95 13970 12 | [[12959 132] 13 | [ 505 374]] -------------------------------------------------------------------------------- /raw_ML/degree_2_polynominal_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.preprocessing import PolynomialFeatures 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_recall_fscore_support 9 | from sklearn.metrics import confusion_matrix, classification_report 10 | from shared import prepare_data 11 | 12 | if __name__ == "__main__": 13 | 14 | X_train, X_test, Y_train, Y_test = prepare_data() 15 | trans = PolynomialFeatures(degree=2) 16 | X_train = trans.fit_transform(X_train) 17 | X_test = trans.fit_transform(X_test) 18 | clf = RidgeClassifier().fit(X_train, Y_train) 19 | _predict = clf.predict(X_test) 20 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 21 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 22 | fi = open('./degree_2_polynominal_regression.txt', 'w') 23 | fi.write(f"precision = {precision[1]}\n") 24 | fi.write(f"recall = {recall[1]}\n") 25 | fi.write(f"F1 = {f1[1]}\n") 26 | fi.write(classification_report(Y_test, _predict)) 27 | fi.write(f"{cm}") 28 | fi.close() -------------------------------------------------------------------------------- /raw_ML/degree_2_polynominal_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.3512987012987013 2 | recall = 0.6154721274175199 3 | F1 = 0.44729226953286483 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.92 0.95 13091 7 | 1 0.35 0.62 0.45 879 8 | 9 | accuracy 0.90 13970 10 | macro avg 0.66 0.77 0.70 13970 11 | weighted avg 0.93 0.90 0.92 13970 12 | [[12092 999] 13 | [ 338 541]] -------------------------------------------------------------------------------- /raw_ML/linear_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import RidgeClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data() 13 | clf = RidgeClassifier().fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | fi = open('./linear_regression.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /raw_ML/linear_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.5067114093959731 2 | recall = 0.17178612059158135 3 | F1 = 0.2565845369583687 4 | precision recall f1-score support 5 | 6 | 0 0.95 0.99 0.97 13091 7 | 1 0.51 0.17 0.26 879 8 | 9 | accuracy 0.94 13970 10 | macro avg 0.73 0.58 0.61 13970 11 | weighted avg 0.92 0.94 0.92 13970 12 | [[12944 147] 13 | [ 728 151]] -------------------------------------------------------------------------------- /raw_ML/logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | 12 | if __name__ == "__main__": 13 | X_train, X_test, Y_train, Y_test = prepare_data() 14 | clf = LogisticRegression().fit(X_train, Y_train) 15 | _predict = clf.predict(X_test) 16 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 17 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 18 | fi = open('./logistic_regression.txt', 'w') 19 | fi.write(f"precision = {precision[1]}\n") 20 | fi.write(f"recall = {recall[1]}\n") 21 | fi.write(f"F1 = {f1[1]}\n") 22 | fi.write(classification_report(Y_test, _predict)) 23 | fi.write(f"{cm}") 24 | fi.close() -------------------------------------------------------------------------------- /raw_ML/logistic_regression.txt: -------------------------------------------------------------------------------- 1 | precision = 0.45387755102040817 2 | recall = 0.6325369738339022 3 | F1 = 0.5285171102661598 4 | precision recall f1-score support 5 | 6 | 0 0.97 0.95 0.96 13091 7 | 1 0.45 0.63 0.53 879 8 | 9 | accuracy 0.93 13970 10 | macro avg 0.71 0.79 0.75 13970 11 | weighted avg 0.94 0.93 0.93 13970 12 | [[12422 669] 13 | [ 323 556]] -------------------------------------------------------------------------------- /raw_ML/logistic_regressionCV.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegressionCV 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | X_train, X_test, Y_train, Y_test = prepare_data() 13 | clf = LogisticRegressionCV(cv=10, max_iter=2000, random_state=0).fit(X_train, Y_train) 14 | _predict = clf.predict(X_test) 15 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 16 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 17 | fi = open('./logistic_regressionCV.txt', 'w') 18 | fi.write(f"precision = {precision[1]}\n") 19 | fi.write(f"recall = {recall[1]}\n") 20 | fi.write(f"F1 = {f1[1]}\n") 21 | fi.write(classification_report(Y_test, _predict)) 22 | fi.write(f"{cm}") 23 | fi.close() -------------------------------------------------------------------------------- /raw_ML/logistic_regressionCV.txt: -------------------------------------------------------------------------------- 1 | precision = 0.712484237074401 2 | recall = 0.6427758816837316 3 | F1 = 0.6758373205741628 4 | precision recall f1-score support 5 | 6 | 0 0.98 0.98 0.98 13091 7 | 1 0.71 0.64 0.68 879 8 | 9 | accuracy 0.96 13970 10 | macro avg 0.84 0.81 0.83 13970 11 | weighted avg 0.96 0.96 0.96 13970 12 | [[12863 228] 13 | [ 314 565]] -------------------------------------------------------------------------------- /raw_ML/random_forest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.metrics import precision_recall_fscore_support 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | from shared import prepare_data 10 | 11 | if __name__ == "__main__": 12 | 13 | X_train, X_test, Y_train, Y_test = prepare_data() 14 | 15 | clf = RandomForestClassifier(n_estimators=50, max_depth=100,random_state=0).fit(X_train, Y_train) 16 | _predict = clf.predict(X_test) 17 | cm = confusion_matrix(Y_test, _predict, labels=clf.classes_) 18 | precision, recall, f1, _ = precision_recall_fscore_support(Y_test, _predict) 19 | fi = open('./random_forest.txt', 'w') 20 | fi.write(f"precision = {precision[1]}\n") 21 | fi.write(f"recall = {recall[1]}\n") 22 | fi.write(f"F1 = {f1[1]}\n") 23 | fi.write(classification_report(Y_test, _predict)) 24 | fi.write(f"{cm}") 25 | fi.close() -------------------------------------------------------------------------------- /raw_ML/random_forest.txt: -------------------------------------------------------------------------------- 1 | precision = 0.9878892733564014 2 | recall = 0.6496018202502845 3 | F1 = 0.783802333562114 4 | precision recall f1-score support 5 | 6 | 0 0.98 1.00 0.99 13091 7 | 1 0.99 0.65 0.78 879 8 | 9 | accuracy 0.98 13970 10 | macro avg 0.98 0.82 0.89 13970 11 | weighted avg 0.98 0.98 0.98 13970 12 | [[13084 7] 13 | [ 308 571]] -------------------------------------------------------------------------------- /raw_ML/run_list.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ./random_forest.py 3 | ./Mlp.py 4 | ./logistic_regressionCV.py 5 | ./SVM.py 6 | ./logistic_regression.py 7 | ./linear_regression.py 8 | ./degree_2_polynominal_regression.py 9 | ./degree_2_logistic_regression.py -------------------------------------------------------------------------------- /raw_ML/shared.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | 7 | 8 | 9 | def prepare_data(): 10 | features = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_features.csv',header=None) 11 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 12 | feature = [str(i) for i in range(165)] 13 | features.columns = ["txId","time_step"] + feature 14 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 15 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 16 | data = features[(features['class']=='1') | (features['class']=='2')] 17 | X = data[feature] 18 | Y = data['class'] 19 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 20 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 21 | return X_train, X_test, Y_train, Y_test -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | scikit-learn 4 | seaborn 5 | statsmodels 6 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | CLASSFILE=../elliptic_bitcoin_dataset/elliptic_txs_classes.csv 3 | FEATUREFILE=../elliptic_bitcoin_dataset/elliptic_txs_features.csv 4 | RESULTSFILE=../elliptic_bitcoin_dataset/Result.csv 5 | EDGEFILE=../elliptic_bitcoin_dataset/elliptic_txs_edgelist.csv 6 | if [[ -f "$CLASSFILE" && -f "$FEATUREFILE" && -f "$RESULTSFILE" && -f "$EDGEFILE" ]]; then 7 | echo "ALL Exist" 8 | FILE=../elliptic_bitcoin_dataset/full_data.csv 9 | if [[ -f "$FILE" ]]; then 10 | echo "$FILE Exist" 11 | else 12 | echo "Adding Features" 13 | cd orginizing_data_src/ 14 | ./connect_real_txs.py 15 | cd .. 16 | fi 17 | case $1 in 18 | -all|-A) 19 | cd raw_ML/ 20 | ./run_list.sh 21 | ##cd ../modified_ML 22 | ##./run_list.sh 23 | cd ../pca_ML 24 | ./run_list.sh 25 | cd ../feature_select_corr_ML 26 | ./run_list.sh 27 | cd ../feature_select_pca_ML/ 28 | ./run_list.sh 29 | cd ../stat 30 | ./run_list.sh 31 | cd .. 32 | echo "Finished $1" 33 | ;; 34 | -raw|-R) 35 | cd raw_ML/ 36 | ./run_list.sh 37 | cd .. 38 | echo "Finished $1" 39 | ;; 40 | -modified|-M) 41 | cd modified_ML/ 42 | ./run_list.sh 43 | cd .. 44 | echo "Finished $1" 45 | ;; 46 | -pca|-P) 47 | cd pca_ML/ 48 | ./run_list.sh 49 | cd .. 50 | echo "Finished $1" 51 | ;; 52 | pcaf|-PF) 53 | cd feature_select_pca_ML/ 54 | ./run_list.sh 55 | cd .. 56 | echo "Finished $1" 57 | ;; 58 | -corrf|-CF) 59 | cd feature_select_corr_ML/ 60 | ./run_list.sh 61 | cd .. 62 | echo "Finished $1" 63 | ;; 64 | -stat|-S) 65 | cd stat/ 66 | ./run_list.sh 67 | cd .. 68 | echo "Finished $1" 69 | ;; 70 | *) 71 | echo "Wrong Flag" 72 | ;; 73 | esac 74 | else 75 | echo "Missing file" 76 | fi 77 | -------------------------------------------------------------------------------- /stat/hy_testing_modified.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import pandas as pd 4 | import numpy as np 5 | from sklearn.linear_model import LinearRegression 6 | import statsmodels.api as sm 7 | from scipy import stats 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | from sklearn.model_selection import train_test_split 11 | features = pd.read_csv('../../elliptic_bitcoin_dataset/full_data.csv',header=None) 12 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 13 | feature = [str(i) for i in range(171)] 14 | features.columns = ["txId","time_step"] + feature 15 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 16 | 17 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 18 | features.dropna(subset=['165'], inplace=True) 19 | features.dropna(subset=['166'], inplace=True) 20 | features.dropna(subset=['167'], inplace=True) 21 | features.dropna(subset=['168'], inplace=True) 22 | features.dropna(subset=['169'], inplace=True) 23 | features.dropna(subset=['170'], inplace=True) 24 | 25 | data = features[(features['class']=='1') | (features['class']=='2')] 26 | X = data[feature] 27 | Y = data['class'] 28 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 29 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 30 | X2 = sm.add_constant(X_train) 31 | est = sm.OLS(Y_train, X2) 32 | est2 = est.fit() 33 | print("Linear regression") 34 | print(est2.summary()) 35 | fi = open('./linear_modified.txt', 'w') 36 | fi.write(f"{est2.summary()}") 37 | fi.close() 38 | fi = open('./corr_modified.txt', 'w') 39 | fi.write(f"{X.corr().to_string()}") 40 | fi.close() 41 | cmap = sns.diverging_palette(0, 230, 90, 60, as_cmap=True) 42 | sns.heatmap(X.corr(), cmap=cmap, cbar={'shrink':0.4, 'ticks':[-1, -0.5, 0, 0.5, 1]}) 43 | plt.savefig('../image/corr_modified.png') 44 | plt.close() 45 | 46 | cor = [] 47 | for i in range(171): 48 | x = X[f'{i}'].corr(Y) 49 | cor.append(x) 50 | plt.plot(cor) 51 | plt.savefig("../image/corr_y_modified.png") 52 | 53 | -------------------------------------------------------------------------------- /stat/hy_testing_pca.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import pandas as pd 4 | import numpy as np 5 | from sklearn.linear_model import LinearRegression 6 | import statsmodels.api as sm 7 | from scipy import stats 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | from sklearn.preprocessing import StandardScaler 11 | from sklearn.decomposition import PCA 12 | from sklearn.model_selection import train_test_split 13 | features = pd.read_csv('../../elliptic_bitcoin_dataset/full_data.csv',header=None) 14 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 15 | feature = [str(i) for i in range(171)] 16 | features.columns = ["txId","time_step"] + feature 17 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 18 | 19 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 20 | features.dropna(subset=['165'], inplace=True) 21 | features.dropna(subset=['166'], inplace=True) 22 | features.dropna(subset=['167'], inplace=True) 23 | features.dropna(subset=['168'], inplace=True) 24 | features.dropna(subset=['169'], inplace=True) 25 | features.dropna(subset=['170'], inplace=True) 26 | 27 | data = features[(features['class']=='1') | (features['class']=='2')] 28 | X = data[feature] 29 | Y = data['class'] 30 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 31 | std = StandardScaler() 32 | X = std.fit_transform(X) 33 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 34 | pca = PCA(n_components = X.shape[1] - 1) 35 | pca.fit(X_train) 36 | X_train = pca.transform(X_train) 37 | X = pca.transform(X) 38 | X2 = sm.add_constant(X_train) 39 | est = sm.OLS(Y_train, X2) 40 | est2 = est.fit() 41 | print("Linear regression") 42 | print(est2.summary()) 43 | fi = open('./linear_pca.txt', 'w') 44 | fi.write(f"{est2.summary()}") 45 | fi.close() 46 | 47 | log_reg = sm.Logit(Y_train, X2).fit(method='bfgs') 48 | print("logistic regression") 49 | print(log_reg.summary()) 50 | fi = open('./logistic_pca.txt', 'w') 51 | fi.write(f"{log_reg.summary()}") 52 | fi.close() 53 | 54 | fi = open('./corr_pca.txt', 'w') 55 | fi.write(f"{pd.DataFrame(X).corr().to_string()}") 56 | fi.close() 57 | cmap = sns.diverging_palette(0, 230, 90, 60, as_cmap=True) 58 | sns.heatmap(pd.DataFrame(X).corr(), cmap=cmap, cbar={'shrink':0.4, 'ticks':[-1, -0.5, 0, 0.5, 1]}) 59 | plt.savefig('../image/corr_pca.png') 60 | plt.close() 61 | 62 | cor = [] 63 | X = pd.DataFrame(X) 64 | feature = [str(i) for i in range(170)] 65 | X.columns = feature 66 | for i in range(170): 67 | x = X[f'{i}'].corr(Y) 68 | cor.append(x) 69 | plt.plot(cor) 70 | plt.savefig("../image/corr_y_pca.png") -------------------------------------------------------------------------------- /stat/hy_testing_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import pandas as pd 4 | import numpy as np 5 | from sklearn.linear_model import LinearRegression 6 | import statsmodels.api as sm 7 | from scipy import stats 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | from sklearn.model_selection import train_test_split 11 | features = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_features.csv',header=None) 12 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 13 | feature = [str(i) for i in range(165)] 14 | features.columns = ["txId","time_step"] + feature 15 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 16 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 17 | data = features[(features['class']=='1') | (features['class']=='2')] 18 | X = data[feature] 19 | Y = data['class'] 20 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 21 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=0,shuffle=False) 22 | 23 | X2 = sm.add_constant(X_train) 24 | est = sm.OLS(Y_train, X2) 25 | est2 = est.fit() 26 | print("Linear regression") 27 | print(est2.summary()) 28 | fi = open('./linear_raw.txt', 'w') 29 | fi.write(f"{est2.summary()}") 30 | fi.close() 31 | fi = open('./corr_raw.txt', 'w') 32 | fi.write(f"{X.corr().to_string()}") 33 | fi.close() 34 | cmap = sns.diverging_palette(0, 230, 90, 60, as_cmap=True) 35 | sns.heatmap(X.corr(), cmap=cmap, cbar={'shrink':0.4, 'ticks':[-1, -0.5, 0, 0.5, 1]}) 36 | plt.savefig('../image/corr_raw.png') 37 | plt.close() 38 | 39 | cor = [] 40 | for i in range(165): 41 | x = X[f'{i}'].corr(Y) 42 | cor.append(x) 43 | plt.plot(cor) 44 | plt.savefig("../image/corr_y_raw.png") 45 | -------------------------------------------------------------------------------- /stat/linear_pca.txt: -------------------------------------------------------------------------------- 1 | OLS Regression Results 2 | ============================================================================== 3 | Dep. Variable: class R-squared: 0.247 4 | Model: OLS Adj. R-squared: 0.243 5 | Method: Least Squares F-statistic: 63.79 6 | Date: Mon, 18 Jan 2021 Prob (F-statistic): 0.00 7 | Time: 11:34:38 Log-Likelihood: -4158.4 8 | No. Observations: 32230 AIC: 8649. 9 | Df Residuals: 32064 BIC: 1.004e+04 10 | Df Model: 165 11 | Covariance Type: nonrobust 12 | ============================================================================== 13 | coef std err t P>|t| [0.025 0.975] 14 | ------------------------------------------------------------------------------ 15 | const 0.1136 0.002 73.864 0.000 0.111 0.117 16 | x1 -0.0137 0.000 -41.861 0.000 -0.014 -0.013 17 | x2 0.0039 0.000 8.016 0.000 0.003 0.005 18 | x3 -0.0063 0.001 -12.210 0.000 -0.007 -0.005 19 | x4 0.0026 0.001 4.757 0.000 0.002 0.004 20 | x5 0.0018 0.001 3.058 0.002 0.001 0.003 21 | x6 -0.0147 0.001 -23.651 0.000 -0.016 -0.013 22 | x7 -0.0082 0.001 -12.704 0.000 -0.009 -0.007 23 | x8 -0.0028 0.001 -3.988 0.000 -0.004 -0.001 24 | x9 -0.0016 0.001 -2.199 0.028 -0.003 -0.000 25 | x10 0.0128 0.001 17.633 0.000 0.011 0.014 26 | x11 0.0113 0.001 15.066 0.000 0.010 0.013 27 | x12 0.0033 0.001 4.277 0.000 0.002 0.005 28 | x13 -0.0067 0.001 -8.433 0.000 -0.008 -0.005 29 | x14 -0.0174 0.001 -21.356 0.000 -0.019 -0.016 30 | x15 -0.0127 0.001 -15.434 0.000 -0.014 -0.011 31 | x16 -0.0014 0.001 -1.665 0.096 -0.003 0.000 32 | x17 -0.0113 0.001 -13.133 0.000 -0.013 -0.010 33 | x18 0.0028 0.001 3.234 0.001 0.001 0.005 34 | x19 0.0134 0.001 14.870 0.000 0.012 0.015 35 | x20 -0.0105 0.001 -11.072 0.000 -0.012 -0.009 36 | x21 -0.0074 0.001 -7.692 0.000 -0.009 -0.006 37 | x22 0.0139 0.001 14.186 0.000 0.012 0.016 38 | x23 0.0296 0.001 28.959 0.000 0.028 0.032 39 | x24 -0.0362 0.001 -34.955 0.000 -0.038 -0.034 40 | x25 0.0037 0.001 3.440 0.001 0.002 0.006 41 | x26 0.0139 0.001 12.660 0.000 0.012 0.016 42 | x27 0.0091 0.001 8.032 0.000 0.007 0.011 43 | x28 0.0109 0.001 9.237 0.000 0.009 0.013 44 | x29 -0.0180 0.001 -14.888 0.000 -0.020 -0.016 45 | x30 0.0029 0.001 2.358 0.018 0.000 0.005 46 | x31 -0.0009 0.001 -0.692 0.489 -0.003 0.002 47 | x32 -0.0091 0.001 -7.101 0.000 -0.012 -0.007 48 | x33 -0.0222 0.001 -17.259 0.000 -0.025 -0.020 49 | x34 -0.0105 0.001 -7.945 0.000 -0.013 -0.008 50 | x35 -0.0120 0.001 -9.024 0.000 -0.015 -0.009 51 | x36 0.0085 0.001 6.251 0.000 0.006 0.011 52 | x37 -0.0180 0.001 -13.073 0.000 -0.021 -0.015 53 | x38 0.0003 0.001 0.223 0.823 -0.003 0.003 54 | x39 -0.0077 0.001 -5.324 0.000 -0.011 -0.005 55 | x40 0.0066 0.001 4.506 0.000 0.004 0.009 56 | x41 0.0040 0.001 2.706 0.007 0.001 0.007 57 | x42 -0.0178 0.002 -11.842 0.000 -0.021 -0.015 58 | x43 0.0183 0.002 12.139 0.000 0.015 0.021 59 | x44 -0.0093 0.002 -5.986 0.000 -0.012 -0.006 60 | x45 0.0004 0.002 0.228 0.820 -0.003 0.003 61 | x46 0.0152 0.002 9.591 0.000 0.012 0.018 62 | x47 -0.0214 0.002 -13.192 0.000 -0.025 -0.018 63 | x48 -0.0039 0.002 -2.349 0.019 -0.007 -0.001 64 | x49 0.0085 0.002 5.060 0.000 0.005 0.012 65 | x50 -0.0001 0.002 -0.069 0.945 -0.003 0.003 66 | x51 -0.0004 0.002 -0.254 0.799 -0.004 0.003 67 | x52 -7.098e-05 0.002 -0.040 0.968 -0.004 0.003 68 | x53 -0.0218 0.002 -12.206 0.000 -0.025 -0.018 69 | x54 0.0045 0.002 2.469 0.014 0.001 0.008 70 | x55 0.0225 0.002 11.818 0.000 0.019 0.026 71 | x56 0.0040 0.002 2.099 0.036 0.000 0.008 72 | x57 0.0065 0.002 3.331 0.001 0.003 0.010 73 | x58 -0.0135 0.002 -6.808 0.000 -0.017 -0.010 74 | x59 -0.0064 0.002 -3.180 0.001 -0.010 -0.002 75 | x60 -0.0010 0.002 -0.476 0.634 -0.005 0.003 76 | x61 -0.0023 0.002 -1.034 0.301 -0.007 0.002 77 | x62 0.0105 0.002 4.431 0.000 0.006 0.015 78 | x63 0.0160 0.002 6.625 0.000 0.011 0.021 79 | x64 0.0244 0.002 9.776 0.000 0.020 0.029 80 | x65 -0.0165 0.003 -6.430 0.000 -0.021 -0.011 81 | x66 0.0042 0.003 1.559 0.119 -0.001 0.009 82 | x67 0.0029 0.003 1.059 0.290 -0.002 0.008 83 | x68 0.0424 0.003 14.723 0.000 0.037 0.048 84 | x69 -0.0143 0.003 -4.879 0.000 -0.020 -0.009 85 | x70 0.0076 0.003 2.487 0.013 0.002 0.014 86 | x71 0.0337 0.003 10.612 0.000 0.027 0.040 87 | x72 -0.0188 0.003 -5.571 0.000 -0.025 -0.012 88 | x73 0.0003 0.004 0.064 0.949 -0.008 0.008 89 | x74 -0.0186 0.004 -4.379 0.000 -0.027 -0.010 90 | x75 0.0139 0.005 3.086 0.002 0.005 0.023 91 | x76 -0.0079 0.005 -1.566 0.117 -0.018 0.002 92 | x77 -0.0031 0.005 -0.576 0.565 -0.014 0.007 93 | x78 0.0003 0.006 0.055 0.956 -0.011 0.012 94 | x79 0.0011 0.006 0.183 0.854 -0.011 0.013 95 | x80 0.0360 0.006 5.986 0.000 0.024 0.048 96 | x81 -0.0225 0.007 -3.438 0.001 -0.035 -0.010 97 | x82 -0.0277 0.007 -3.849 0.000 -0.042 -0.014 98 | x83 0.0354 0.007 4.831 0.000 0.021 0.050 99 | x84 -0.1028 0.008 -12.936 0.000 -0.118 -0.087 100 | x85 0.0487 0.008 6.103 0.000 0.033 0.064 101 | x86 -0.0091 0.008 -1.086 0.278 -0.025 0.007 102 | x87 0.0137 0.009 1.537 0.124 -0.004 0.031 103 | x88 0.0116 0.009 1.264 0.206 -0.006 0.030 104 | x89 -0.0264 0.010 -2.749 0.006 -0.045 -0.008 105 | x90 0.0013 0.010 0.130 0.897 -0.018 0.021 106 | x91 0.0106 0.010 1.042 0.298 -0.009 0.031 107 | x92 0.0079 0.011 0.720 0.472 -0.014 0.029 108 | x93 0.0170 0.011 1.539 0.124 -0.005 0.039 109 | x94 0.0072 0.011 0.637 0.524 -0.015 0.030 110 | x95 -0.0288 0.011 -2.507 0.012 -0.051 -0.006 111 | x96 -0.0583 0.012 -5.035 0.000 -0.081 -0.036 112 | x97 0.0179 0.012 1.511 0.131 -0.005 0.041 113 | x98 0.0018 0.012 0.147 0.883 -0.022 0.025 114 | x99 0.0281 0.012 2.285 0.022 0.004 0.052 115 | x100 0.0116 0.013 0.917 0.359 -0.013 0.036 116 | x101 0.0033 0.013 0.257 0.797 -0.022 0.029 117 | x102 0.0025 0.013 0.185 0.853 -0.024 0.028 118 | x103 -0.0111 0.014 -0.815 0.415 -0.038 0.016 119 | x104 0.1003 0.014 7.317 0.000 0.073 0.127 120 | x105 0.0012 0.015 0.086 0.932 -0.027 0.030 121 | x106 -0.0643 0.015 -4.301 0.000 -0.094 -0.035 122 | x107 0.0006 0.015 0.039 0.969 -0.029 0.031 123 | x108 0.0030 0.016 0.191 0.849 -0.028 0.034 124 | x109 0.0448 0.016 2.857 0.004 0.014 0.076 125 | x110 0.0701 0.016 4.439 0.000 0.039 0.101 126 | x111 0.0192 0.017 1.143 0.253 -0.014 0.052 127 | x112 -0.0909 0.018 -5.192 0.000 -0.125 -0.057 128 | x113 -0.0133 0.018 -0.731 0.465 -0.049 0.022 129 | x114 0.0675 0.018 3.656 0.000 0.031 0.104 130 | x115 0.0846 0.019 4.537 0.000 0.048 0.121 131 | x116 0.0015 0.019 0.079 0.937 -0.036 0.038 132 | x117 0.1053 0.019 5.468 0.000 0.068 0.143 133 | x118 -0.0654 0.019 -3.371 0.001 -0.103 -0.027 134 | x119 -0.1062 0.020 -5.438 0.000 -0.144 -0.068 135 | x120 -0.0080 0.020 -0.392 0.695 -0.048 0.032 136 | x121 -0.0093 0.021 -0.448 0.654 -0.050 0.031 137 | x122 -0.0958 0.021 -4.521 0.000 -0.137 -0.054 138 | x123 0.0138 0.022 0.639 0.523 -0.029 0.056 139 | x124 -0.0877 0.022 -4.017 0.000 -0.131 -0.045 140 | x125 0.0234 0.022 1.057 0.291 -0.020 0.067 141 | x126 0.0268 0.022 1.194 0.233 -0.017 0.071 142 | x127 -0.0907 0.023 -3.964 0.000 -0.136 -0.046 143 | x128 0.0237 0.023 1.009 0.313 -0.022 0.070 144 | x129 -0.0039 0.024 -0.158 0.875 -0.052 0.044 145 | x130 0.0237 0.025 0.944 0.345 -0.026 0.073 146 | x131 0.0678 0.026 2.613 0.009 0.017 0.119 147 | x132 -0.0139 0.028 -0.504 0.614 -0.068 0.040 148 | x133 0.0329 0.028 1.171 0.241 -0.022 0.088 149 | x134 -0.0973 0.028 -3.440 0.001 -0.153 -0.042 150 | x135 -0.0024 0.029 -0.083 0.933 -0.058 0.054 151 | x136 0.0054 0.031 0.172 0.864 -0.056 0.066 152 | x137 0.0104 0.036 0.289 0.773 -0.060 0.081 153 | x138 0.0130 0.040 0.322 0.747 -0.066 0.092 154 | x139 -0.0231 0.044 -0.529 0.597 -0.109 0.063 155 | x140 -0.0690 0.044 -1.558 0.119 -0.156 0.018 156 | x141 -0.0955 0.046 -2.094 0.036 -0.185 -0.006 157 | x142 0.0300 0.048 0.628 0.530 -0.064 0.124 158 | x143 0.1232 0.051 2.394 0.017 0.022 0.224 159 | x144 0.0511 0.053 0.959 0.337 -0.053 0.156 160 | x145 0.0484 0.058 0.829 0.407 -0.066 0.163 161 | x146 -0.0176 0.064 -0.274 0.784 -0.143 0.108 162 | x147 0.1208 0.075 1.605 0.108 -0.027 0.268 163 | x148 -0.0253 0.088 -0.289 0.773 -0.197 0.147 164 | x149 -0.1761 0.100 -1.755 0.079 -0.373 0.021 165 | x150 -0.0810 0.118 -0.684 0.494 -0.313 0.151 166 | x151 -0.0080 0.165 -0.048 0.961 -0.332 0.316 167 | x152 0.1166 0.188 0.620 0.535 -0.252 0.485 168 | x153 0.1786 0.205 0.870 0.384 -0.224 0.581 169 | x154 -0.1993 0.215 -0.925 0.355 -0.621 0.223 170 | x155 0.1690 0.245 0.691 0.490 -0.311 0.649 171 | x156 -0.6549 0.331 -1.980 0.048 -1.303 -0.006 172 | x157 0.1312 0.443 0.296 0.767 -0.737 1.000 173 | x158 -0.1013 0.505 -0.200 0.841 -1.091 0.889 174 | x159 9.8369 8.058 1.221 0.222 -5.957 25.630 175 | x160 5.0025 13.181 0.380 0.704 -20.832 30.837 176 | x161 -22.2650 43.279 -0.514 0.607 -107.094 62.564 177 | x162 -43.8220 46.566 -0.941 0.347 -135.094 47.450 178 | x163 20.9626 222.509 0.094 0.925 -415.164 457.089 179 | x164 -12.9916 233.933 -0.056 0.956 -471.509 445.526 180 | x165 395.0666 614.098 0.643 0.520 -808.588 1598.721 181 | x166 -9.607e-09 1.53e-08 -0.629 0.529 -3.95e-08 2.03e-08 182 | x167 5.392e-09 8.55e-09 0.631 0.528 -1.14e-08 2.21e-08 183 | x168 -3.563e-09 6.01e-09 -0.592 0.554 -1.53e-08 8.22e-09 184 | x169 1.251e-08 1.96e-08 0.638 0.523 -2.59e-08 5.09e-08 185 | x170 -1.195e-09 2.03e-09 -0.588 0.556 -5.18e-09 2.79e-09 186 | ============================================================================== 187 | Omnibus: 7931.645 Durbin-Watson: 1.734 188 | Prob(Omnibus): 0.000 Jarque-Bera (JB): 16386.652 189 | Skew: 1.469 Prob(JB): 0.00 190 | Kurtosis: 4.889 Cond. No. 1.51e+16 191 | ============================================================================== 192 | 193 | Notes: 194 | [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. 195 | [2] The smallest eigenvalue is 3.13e-27. This might indicate that there are 196 | strong multicollinearity problems or that the design matrix is singular. -------------------------------------------------------------------------------- /stat/linear_raw.txt: -------------------------------------------------------------------------------- 1 | OLS Regression Results 2 | ============================================================================== 3 | Dep. Variable: class R-squared: 0.246 4 | Model: OLS Adj. R-squared: 0.242 5 | Method: Least Squares F-statistic: 64.79 6 | Date: Mon, 18 Jan 2021 Prob (F-statistic): 0.00 7 | Time: 11:35:19 Log-Likelihood: -4100.7 8 | No. Observations: 32594 AIC: 8529. 9 | Df Residuals: 32430 BIC: 9906. 10 | Df Model: 163 11 | Covariance Type: nonrobust 12 | ============================================================================== 13 | coef std err t P>|t| [0.025 0.975] 14 | ------------------------------------------------------------------------------ 15 | const 0.2105 0.003 60.647 0.000 0.204 0.217 16 | 0 0.0769 0.031 2.474 0.013 0.016 0.138 17 | 1 0.0021 0.003 0.607 0.544 -0.005 0.009 18 | 2 -0.0275 0.003 -10.752 0.000 -0.033 -0.023 19 | 3 -0.0004 0.009 -0.043 0.966 -0.019 0.018 20 | 4 1.6861 6.149 0.274 0.784 -10.366 13.739 21 | 5 0.0026 0.009 0.283 0.777 -0.015 0.021 22 | 6 -0.0073 0.004 -1.633 0.102 -0.016 0.001 23 | 7 0.1382 0.254 0.544 0.586 -0.360 0.636 24 | 8 -0.0822 0.050 -1.636 0.102 -0.181 0.016 25 | 9 0.0090 0.016 0.567 0.571 -0.022 0.040 26 | 10 -0.1402 0.221 -0.634 0.526 -0.574 0.293 27 | 11 0.0056 0.005 1.034 0.301 -0.005 0.016 28 | 12 -0.0107 0.005 -1.967 0.049 -0.021 -3.73e-05 29 | 13 -1.6527 6.029 -0.274 0.784 -13.470 10.165 30 | 14 -0.1047 0.797 -0.131 0.896 -1.668 1.458 31 | 15 -0.0165 0.015 -1.091 0.275 -0.046 0.013 32 | 16 0.0471 0.045 1.051 0.293 -0.041 0.135 33 | 17 -0.1110 0.104 -1.070 0.284 -0.314 0.092 34 | 18 0.0109 0.080 0.136 0.892 -0.146 0.168 35 | 19 0.0044 0.014 0.315 0.753 -0.023 0.032 36 | 20 0.0074 0.014 0.522 0.601 -0.020 0.035 37 | 21 72.9284 126.970 0.574 0.566 -175.937 321.794 38 | 22 -95.8578 149.506 -0.641 0.521 -388.896 197.180 39 | 23 60.9518 116.568 0.523 0.601 -167.526 289.429 40 | 24 0.0003 0.007 0.042 0.967 -0.014 0.014 41 | 25 -0.2315 0.120 -1.932 0.053 -0.466 0.003 42 | 26 0.2458 0.118 2.087 0.037 0.015 0.477 43 | 27 -72.9193 126.950 -0.574 0.566 -321.747 175.908 44 | 28 95.8622 149.496 0.641 0.521 -197.156 388.880 45 | 29 -60.9572 116.568 -0.523 0.601 -289.435 167.520 46 | 30 0.0002 0.007 0.031 0.975 -0.014 0.014 47 | 31 0.2359 0.120 1.967 0.049 0.001 0.471 48 | 32 -0.2357 0.118 -2.001 0.045 -0.467 -0.005 49 | 33 -0.1929 0.201 -0.958 0.338 -0.588 0.202 50 | 34 0.0563 0.081 0.696 0.486 -0.102 0.215 51 | 35 -0.0194 0.026 -0.749 0.454 -0.070 0.031 52 | 36 0.1216 0.166 0.732 0.464 -0.204 0.447 53 | 37 0.0065 0.007 0.956 0.339 -0.007 0.020 54 | 38 -0.0081 0.007 -1.192 0.233 -0.021 0.005 55 | 39 -0.0136 0.022 -0.603 0.546 -0.058 0.031 56 | 40 -0.0023 0.057 -0.039 0.969 -0.115 0.110 57 | 41 -0.0087 0.011 -0.765 0.444 -0.031 0.014 58 | 42 0.0277 0.043 0.647 0.517 -0.056 0.112 59 | 43 -0.0033 0.013 -0.260 0.795 -0.028 0.021 60 | 44 0.0082 0.013 0.655 0.513 -0.016 0.033 61 | 45 0.0909 0.058 1.570 0.116 -0.023 0.204 62 | 46 -0.0248 0.072 -0.346 0.730 -0.165 0.116 63 | 47 0.0268 0.012 2.190 0.029 0.003 0.051 64 | 48 -0.0734 0.057 -1.295 0.195 -0.185 0.038 65 | 49 0.0064 0.013 0.505 0.614 -0.019 0.031 66 | 50 -0.0076 0.013 -0.596 0.551 -0.032 0.017 67 | 51 -0.0566 0.009 -6.035 0.000 -0.075 -0.038 68 | 52 -0.0149 0.007 -2.080 0.038 -0.029 -0.001 69 | 53 -0.0251 0.004 -6.343 0.000 -0.033 -0.017 70 | 54 0.0273 0.010 2.687 0.007 0.007 0.047 71 | 55 -0.0003 0.006 -0.046 0.963 -0.011 0.011 72 | 56 -0.0103 0.006 -1.802 0.072 -0.022 0.001 73 | 57 2.1277 21.063 0.101 0.920 -39.157 43.413 74 | 58 -12.8340 28.152 -0.456 0.648 -68.013 42.345 75 | 59 1.0675 76.719 0.014 0.989 -149.305 151.440 76 | 60 -0.0271 0.029 -0.949 0.343 -0.083 0.029 77 | 61 -0.0544 0.113 -0.480 0.631 -0.276 0.168 78 | 62 0.1489 0.111 1.346 0.178 -0.068 0.366 79 | 63 -2.1150 21.060 -0.100 0.920 -43.394 39.164 80 | 64 12.8465 28.154 0.456 0.648 -42.336 68.029 81 | 65 -1.0270 76.724 -0.013 0.989 -151.409 149.355 82 | 66 -0.0270 0.029 -0.946 0.344 -0.083 0.029 83 | 67 0.0568 0.113 0.503 0.615 -0.165 0.278 84 | 68 -0.1365 0.110 -1.237 0.216 -0.353 0.080 85 | 69 0.0113 0.009 1.214 0.225 -0.007 0.030 86 | 70 -0.0314 0.030 -1.048 0.295 -0.090 0.027 87 | 71 0.1239 0.107 1.154 0.249 -0.087 0.334 88 | 72 -0.0831 0.075 -1.110 0.267 -0.230 0.064 89 | 73 -0.0167 0.018 -0.935 0.350 -0.052 0.018 90 | 74 0.0048 0.018 0.268 0.789 -0.030 0.040 91 | 75 -0.0907 0.035 -2.622 0.009 -0.158 -0.023 92 | 76 -0.0725 0.145 -0.499 0.618 -0.357 0.212 93 | 77 0.0396 0.194 0.204 0.838 -0.341 0.420 94 | 78 0.0470 0.115 0.407 0.684 -0.179 0.273 95 | 79 -0.0257 0.030 -0.864 0.388 -0.084 0.033 96 | 80 -0.1081 0.030 -3.586 0.000 -0.167 -0.049 97 | 81 0.0531 0.024 2.220 0.026 0.006 0.100 98 | 82 0.0631 0.138 0.459 0.647 -0.207 0.333 99 | 83 -0.0392 0.191 -0.206 0.837 -0.414 0.335 100 | 84 -0.0521 0.117 -0.447 0.655 -0.281 0.176 101 | 85 0.0317 0.030 1.068 0.285 -0.026 0.090 102 | 86 0.0898 0.030 2.987 0.003 0.031 0.149 103 | 87 -0.0198 0.008 -2.337 0.019 -0.036 -0.003 104 | 88 0.0432 0.009 4.717 0.000 0.025 0.061 105 | 89 -0.0745 0.011 -6.581 0.000 -0.097 -0.052 106 | 90 0.0091 0.013 0.725 0.468 -0.015 0.034 107 | 91 0.0502 0.017 3.010 0.003 0.017 0.083 108 | 92 -0.0309 0.017 -1.848 0.065 -0.064 0.002 109 | 93 0.0534 0.024 2.203 0.028 0.006 0.101 110 | 94 -0.0078 0.006 -1.229 0.219 -0.020 0.005 111 | 95 0.0086 0.007 1.257 0.209 -0.005 0.022 112 | 96 -0.0559 0.022 -2.596 0.009 -0.098 -0.014 113 | 97 0.0051 0.005 1.050 0.294 -0.004 0.015 114 | 98 -0.0039 0.005 -0.797 0.425 -0.014 0.006 115 | 99 -0.0299 0.019 -1.544 0.123 -0.068 0.008 116 | 100 0.0567 0.052 1.093 0.274 -0.045 0.158 117 | 101 -0.0009 0.004 -0.227 0.820 -0.009 0.007 118 | 102 0.0133 0.045 0.295 0.768 -0.075 0.102 119 | 103 -0.0014 0.008 -0.186 0.852 -0.017 0.014 120 | 104 0.0077 0.008 0.984 0.325 -0.008 0.023 121 | 105 -0.0132 0.014 -0.966 0.334 -0.040 0.014 122 | 106 0.0024 0.008 0.295 0.768 -0.014 0.018 123 | 107 -4.479e-05 0.007 -0.007 0.995 -0.013 0.013 124 | 108 -0.0255 0.012 -2.148 0.032 -0.049 -0.002 125 | 109 -0.0018 0.006 -0.305 0.760 -0.013 0.010 126 | 110 0.0011 0.006 0.181 0.856 -0.010 0.013 127 | 111 0.0565 0.009 6.418 0.000 0.039 0.074 128 | 112 -0.0801 0.010 -8.336 0.000 -0.099 -0.061 129 | 113 0.0087 0.004 2.179 0.029 0.001 0.017 130 | 114 0.0310 0.009 3.303 0.001 0.013 0.049 131 | 115 0.0052 0.012 0.430 0.667 -0.018 0.029 132 | 116 -0.0048 0.012 -0.403 0.687 -0.028 0.019 133 | 117 -0.0164 0.012 -1.408 0.159 -0.039 0.006 134 | 118 0.0028 0.003 0.817 0.414 -0.004 0.010 135 | 119 -0.0082 0.005 -1.515 0.130 -0.019 0.002 136 | 120 0.0012 0.011 0.103 0.918 -0.021 0.023 137 | 121 0.0062 0.008 0.762 0.446 -0.010 0.022 138 | 122 -0.0072 0.008 -0.884 0.377 -0.023 0.009 139 | 123 -0.0122 0.005 -2.397 0.017 -0.022 -0.002 140 | 124 0.0089 0.004 2.274 0.023 0.001 0.017 141 | 125 -0.0054 0.004 -1.434 0.152 -0.013 0.002 142 | 126 0.0031 0.005 0.580 0.562 -0.007 0.013 143 | 127 0.0049 0.006 0.869 0.385 -0.006 0.016 144 | 128 -0.0037 0.006 -0.643 0.521 -0.015 0.007 145 | 129 0.0202 0.010 2.011 0.044 0.001 0.040 146 | 130 0.0025 0.008 0.309 0.757 -0.013 0.018 147 | 131 -9.556e-05 0.024 -0.004 0.997 -0.046 0.046 148 | 132 -0.0014 0.020 -0.070 0.944 -0.041 0.038 149 | 133 -0.0175 0.014 -1.214 0.225 -0.046 0.011 150 | 134 0.0171 0.014 1.184 0.236 -0.011 0.045 151 | 135 -0.0462 0.043 -1.081 0.280 -0.130 0.038 152 | 136 0.0530 0.014 3.897 0.000 0.026 0.080 153 | 137 -0.0440 0.011 -3.866 0.000 -0.066 -0.022 154 | 138 0.0066 0.038 0.176 0.860 -0.067 0.080 155 | 139 0.0302 0.014 2.127 0.033 0.002 0.058 156 | 140 -0.0539 0.014 -3.780 0.000 -0.082 -0.026 157 | 141 0.0447 0.012 3.662 0.000 0.021 0.069 158 | 142 0.0077 0.010 0.777 0.437 -0.012 0.027 159 | 143 -0.0623 0.018 -3.485 0.000 -0.097 -0.027 160 | 144 0.0264 0.018 1.459 0.144 -0.009 0.062 161 | 145 0.0224 0.017 1.283 0.199 -0.012 0.057 162 | 146 -0.0246 0.017 -1.408 0.159 -0.059 0.010 163 | 147 0.1025 0.010 10.516 0.000 0.083 0.122 164 | 148 -0.1200 0.013 -9.310 0.000 -0.145 -0.095 165 | 149 0.0596 0.008 7.165 0.000 0.043 0.076 166 | 150 0.0203 0.013 1.621 0.105 -0.004 0.045 167 | 151 -0.0944 0.038 -2.486 0.013 -0.169 -0.020 168 | 152 0.0822 0.038 2.164 0.030 0.008 0.157 169 | 153 0.0180 0.004 4.316 0.000 0.010 0.026 170 | 154 0.0178 0.009 2.055 0.040 0.001 0.035 171 | 155 0.0092 0.009 1.070 0.285 -0.008 0.026 172 | 156 -0.0346 0.007 -4.979 0.000 -0.048 -0.021 173 | 157 -0.0306 0.018 -1.692 0.091 -0.066 0.005 174 | 158 0.0119 0.018 0.659 0.510 -0.023 0.047 175 | 159 0.0012 0.006 0.215 0.830 -0.010 0.012 176 | 160 -0.0008 0.003 -0.229 0.819 -0.007 0.006 177 | 161 0.0076 0.006 1.297 0.195 -0.004 0.019 178 | 162 -0.0181 0.006 -3.005 0.003 -0.030 -0.006 179 | 163 -0.0046 0.017 -0.266 0.790 -0.038 0.029 180 | 164 -0.0053 0.017 -0.307 0.759 -0.039 0.028 181 | ============================================================================== 182 | Omnibus: 8164.798 Durbin-Watson: 1.732 183 | Prob(Omnibus): 0.000 Jarque-Bera (JB): 17124.211 184 | Skew: 1.486 Prob(JB): 0.00 185 | Kurtosis: 4.943 Cond. No. 1.16e+16 186 | ============================================================================== 187 | 188 | Notes: 189 | [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. 190 | [2] The smallest eigenvalue is 1.66e-26. This might indicate that there are 191 | strong multicollinearity problems or that the design matrix is singular. -------------------------------------------------------------------------------- /stat/logistic_pca.txt: -------------------------------------------------------------------------------- 1 | Logit Regression Results 2 | ============================================================================== 3 | Dep. Variable: class No. Observations: 32230 4 | Model: Logit Df Residuals: 32064 5 | Method: MLE Df Model: 165 6 | Date: Mon, 18 Jan 2021 Pseudo R-squ.: 0.5371 7 | Time: 11:34:39 Log-Likelihood: -5279.3 8 | converged: False LL-Null: -11406. 9 | Covariance Type: nonrobust LLR p-value: 0.000 10 | ============================================================================== 11 | coef std err z P>|z| [0.025 0.975] 12 | ------------------------------------------------------------------------------ 13 | const -10.0007 4.838 -2.067 0.039 -19.483 -0.519 14 | x1 -1.4828 1.800 -0.824 0.410 -5.010 2.044 15 | x2 -0.3383 7.827 -0.043 0.966 -15.680 15.003 16 | x3 -2.3858 13.622 -0.175 0.861 -29.084 24.312 17 | x4 -0.6432 2.855 -0.225 0.822 -6.238 4.952 18 | x5 -0.2194 3.946 -0.056 0.956 -7.953 7.514 19 | x6 -0.9874 11.538 -0.086 0.932 -23.602 21.627 20 | x7 -0.2048 5.114 -0.040 0.968 -10.229 9.819 21 | x8 0.2324 11.340 0.020 0.984 -21.993 22.458 22 | x9 -0.0558 6.614 -0.008 0.993 -13.019 12.908 23 | x10 2.4238 10.930 0.222 0.825 -18.999 23.846 24 | x11 -0.1997 8.503 -0.023 0.981 -16.865 16.466 25 | x12 0.5964 5.063 0.118 0.906 -9.327 10.519 26 | x13 -0.1600 5.654 -0.028 0.977 -11.242 10.922 27 | x14 -0.0215 4.339 -0.005 0.996 -8.525 8.482 28 | x15 -0.4409 3.340 -0.132 0.895 -6.987 6.105 29 | x16 -0.1501 3.999 -0.038 0.970 -7.988 7.688 30 | x17 -0.9917 6.206 -0.160 0.873 -13.155 11.172 31 | x18 -0.4393 1.776 -0.247 0.805 -3.921 3.042 32 | x19 -0.4566 2.708 -0.169 0.866 -5.764 4.851 33 | x20 -0.6219 2.431 -0.256 0.798 -5.386 4.142 34 | x21 -0.3628 2.461 -0.147 0.883 -5.187 4.461 35 | x22 0.4490 7.769 0.058 0.954 -14.778 15.676 36 | x23 -0.0966 4.176 -0.023 0.982 -8.282 8.089 37 | x24 -1.2359 2.538 -0.487 0.626 -6.210 3.738 38 | x25 -0.2556 2.137 -0.120 0.905 -4.444 3.933 39 | x26 0.3730 2.335 0.160 0.873 -4.203 4.949 40 | x27 -1.2864 3.847 -0.334 0.738 -8.826 6.253 41 | x28 0.6908 4.196 0.165 0.869 -7.533 8.914 42 | x29 -1.3673 1.908 -0.716 0.474 -5.108 2.373 43 | x30 0.0538 5.042 0.011 0.991 -9.829 9.936 44 | x31 0.3132 2.925 0.107 0.915 -5.420 6.047 45 | x32 0.2607 5.299 0.049 0.961 -10.126 10.647 46 | x33 -0.6100 6.819 -0.089 0.929 -13.976 12.756 47 | x34 -1.0727 3.572 -0.300 0.764 -8.073 5.927 48 | x35 0.1336 7.489 0.018 0.986 -14.544 14.812 49 | x36 0.0107 1.194 0.009 0.993 -2.330 2.352 50 | x37 -0.2570 6.734 -0.038 0.970 -13.456 12.942 51 | x38 -0.8106 1.881 -0.431 0.667 -4.498 2.877 52 | x39 -0.3469 1.442 -0.241 0.810 -3.172 2.479 53 | x40 0.5702 3.823 0.149 0.881 -6.923 8.064 54 | x41 0.3623 3.201 0.113 0.910 -5.912 6.636 55 | x42 -0.4489 4.001 -0.112 0.911 -8.290 7.392 56 | x43 0.1179 2.097 0.056 0.955 -3.991 4.227 57 | x44 -0.2916 1.648 -0.177 0.860 -3.523 2.939 58 | x45 -0.0366 5.790 -0.006 0.995 -11.384 11.311 59 | x46 0.2308 4.342 0.053 0.958 -8.280 8.741 60 | x47 -0.8910 1.574 -0.566 0.571 -3.976 2.194 61 | x48 -0.8073 3.669 -0.220 0.826 -7.999 6.385 62 | x49 0.1262 2.805 0.045 0.964 -5.371 5.623 63 | x50 0.1790 2.475 0.072 0.942 -4.671 5.029 64 | x51 0.0611 1.380 0.044 0.965 -2.644 2.766 65 | x52 -0.4630 8.030 -0.058 0.954 -16.202 15.276 66 | x53 -0.8216 4.307 -0.191 0.849 -9.264 7.621 67 | x54 0.0828 19.156 0.004 0.997 -37.463 37.628 68 | x55 1.0147 10.127 0.100 0.920 -18.834 20.863 69 | x56 -0.6372 2.847 -0.224 0.823 -6.218 4.943 70 | x57 0.7684 7.779 0.099 0.921 -14.477 16.014 71 | x58 -0.2164 2.163 -0.100 0.920 -4.456 4.023 72 | x59 0.1890 5.549 0.034 0.973 -10.687 11.065 73 | x60 -0.1429 1.100 -0.130 0.897 -2.299 2.013 74 | x61 -0.4646 7.334 -0.063 0.949 -14.839 13.909 75 | x62 0.8159 18.526 0.044 0.965 -35.494 37.126 76 | x63 0.6818 29.080 0.023 0.981 -56.313 57.677 77 | x64 0.6405 2.936 0.218 0.827 -5.114 6.395 78 | x65 -0.9740 21.999 -0.044 0.965 -44.092 42.143 79 | x66 -0.0725 6.874 -0.011 0.992 -13.545 13.401 80 | x67 0.1271 9.603 0.013 0.989 -18.694 18.948 81 | x68 0.6477 12.953 0.050 0.960 -24.740 26.035 82 | x69 -0.0920 7.138 -0.013 0.990 -14.081 13.897 83 | x70 0.1717 19.196 0.009 0.993 -37.452 37.795 84 | x71 0.3713 9.791 0.038 0.970 -18.819 19.562 85 | x72 -0.4859 6.195 -0.078 0.937 -12.628 11.656 86 | x73 0.5260 10.469 0.050 0.960 -19.992 21.044 87 | x74 0.5689 13.792 0.041 0.967 -26.462 27.600 88 | x75 0.3460 18.232 0.019 0.985 -35.387 36.080 89 | x76 0.3802 12.889 0.029 0.976 -24.882 25.642 90 | x77 -0.2931 1.180 -0.248 0.804 -2.606 2.020 91 | x78 0.0457 5.647 0.008 0.994 -11.022 11.113 92 | x79 0.2294 62.586 0.004 0.997 -122.437 122.896 93 | x80 0.1180 38.060 0.003 0.998 -74.479 74.715 94 | x81 -0.0811 34.152 -0.002 0.998 -67.017 66.855 95 | x82 -0.2728 48.771 -0.006 0.996 -95.861 95.316 96 | x83 0.2181 56.082 0.004 0.997 -109.701 110.138 97 | x84 -0.7795 17.397 -0.045 0.964 -34.877 33.318 98 | x85 0.2464 28.416 0.009 0.993 -55.448 55.941 99 | x86 0.0897 299.583 0.000 1.000 -587.081 587.261 100 | x87 0.1409 14.133 0.010 0.992 -27.559 27.841 101 | x88 0.1073 53.801 0.002 0.998 -105.341 105.556 102 | x89 -0.1320 34.635 -0.004 0.997 -68.016 67.752 103 | x90 0.0620 17.792 0.003 0.997 -34.810 34.934 104 | x91 0.1632 13.844 0.012 0.991 -26.971 27.298 105 | x92 0.0376 119.726 0.000 1.000 -234.620 234.695 106 | x93 -0.0796 77.997 -0.001 0.999 -152.950 152.791 107 | x94 0.1026 23.271 0.004 0.996 -45.509 45.714 108 | x95 0.1094 113.554 0.001 0.999 -222.452 222.671 109 | x96 -0.1499 71.880 -0.002 0.998 -141.031 140.732 110 | x97 -0.0045 101.327 -4.4e-05 1.000 -198.602 198.593 111 | x98 -0.2104 122.034 -0.002 0.999 -239.393 238.973 112 | x99 0.2051 23.670 0.009 0.993 -46.187 46.598 113 | x100 0.0311 37.420 0.001 0.999 -73.310 73.372 114 | x101 -0.0172 420.691 -4.09e-05 1.000 -824.556 824.521 115 | x102 -0.0066 310.325 -2.13e-05 1.000 -608.233 608.220 116 | x103 -0.1212 298.008 -0.000 1.000 -584.207 583.965 117 | x104 0.3414 65.588 0.005 0.996 -128.208 128.891 118 | x105 -0.0806 22.653 -0.004 0.997 -44.479 44.318 119 | x106 -0.2431 374.724 -0.001 0.999 -734.689 734.202 120 | x107 -0.0833 64.187 -0.001 0.999 -125.888 125.721 121 | x108 0.0406 110.874 0.000 1.000 -217.268 217.349 122 | x109 0.0008 74.680 1.07e-05 1.000 -146.369 146.371 123 | x110 0.0183 28.353 0.001 0.999 -55.554 55.590 124 | x111 0.1073 179.238 0.001 1.000 -351.192 351.407 125 | x112 -0.3437 145.330 -0.002 0.998 -285.185 284.497 126 | x113 -0.1234 162.614 -0.001 0.999 -318.840 318.593 127 | x114 0.0511 78.187 0.001 0.999 -153.192 153.294 128 | x115 0.0430 325.998 0.000 1.000 -638.900 638.986 129 | x116 0.1099 238.798 0.000 1.000 -467.926 468.146 130 | x117 0.2739 195.244 0.001 0.999 -382.397 382.945 131 | x118 -0.1353 209.342 -0.001 0.999 -410.438 410.167 132 | x119 -0.0246 20.880 -0.001 0.999 -40.949 40.900 133 | x120 0.0027 17.882 0.000 1.000 -35.045 35.051 134 | x121 0.0235 197.072 0.000 1.000 -386.230 386.277 135 | x122 -0.2767 17.857 -0.015 0.988 -35.276 34.723 136 | x123 0.0115 645.309 1.78e-05 1.000 -1264.771 1264.794 137 | x124 0.0698 326.428 0.000 1.000 -639.717 639.857 138 | x125 0.0418 125.909 0.000 1.000 -246.736 246.819 139 | x126 0.0438 949.679 4.61e-05 1.000 -1861.292 1861.380 140 | x127 -0.1550 612.605 -0.000 1.000 -1200.839 1200.529 141 | x128 0.0418 191.533 0.000 1.000 -375.357 375.440 142 | x129 -0.0464 434.811 -0.000 1.000 -852.261 852.168 143 | x130 0.0307 312.982 9.8e-05 1.000 -613.404 613.465 144 | x131 0.0735 65.533 0.001 0.999 -128.369 128.516 145 | x132 -0.0146 151.539 -9.64e-05 1.000 -297.025 296.996 146 | x133 0.0338 111.777 0.000 1.000 -219.046 219.113 147 | x134 -0.0839 78.337 -0.001 0.999 -153.621 153.454 148 | x135 0.0365 148.872 0.000 1.000 -291.747 291.820 149 | x136 -0.0050 33.948 -0.000 1.000 -66.542 66.532 150 | x137 0.0051 12.172 0.000 1.000 -23.852 23.863 151 | x138 -0.0133 131.770 -0.000 1.000 -258.278 258.252 152 | x139 -0.0053 111.158 -4.78e-05 1.000 -217.870 217.860 153 | x140 -0.0235 56.919 -0.000 1.000 -111.583 111.536 154 | x141 -0.0213 145.224 -0.000 1.000 -284.655 284.612 155 | x142 0.0280 41.677 0.001 0.999 -81.658 81.714 156 | x143 0.0437 104.886 0.000 1.000 -205.529 205.616 157 | x144 0.0178 63.120 0.000 1.000 -123.695 123.731 158 | x145 -0.0018 77.942 -2.34e-05 1.000 -152.766 152.762 159 | x146 0.0081 288.650 2.79e-05 1.000 -565.735 565.751 160 | x147 0.0079 20.841 0.000 1.000 -40.840 40.856 161 | x148 -0.0060 290.741 -2.05e-05 1.000 -569.847 569.835 162 | x149 -0.0070 60.762 -0.000 1.000 -119.098 119.084 163 | x150 0.0023 333.030 6.87e-06 1.000 -652.724 652.729 164 | x151 -0.0011 181.320 -5.9e-06 1.000 -355.381 355.379 165 | x152 0.0010 1.12e+04 8.76e-08 1.000 -2.2e+04 2.2e+04 166 | x153 0.0006 4457.562 1.38e-07 1.000 -8736.661 8736.662 167 | x154 -0.0017 339.655 -5.1e-06 1.000 -665.714 665.711 168 | x155 -0.0011 1058.129 -1.02e-06 1.000 -2073.896 2073.894 169 | x156 -0.0082 92.678 -8.88e-05 1.000 -181.653 181.637 170 | x157 0.0007 919.918 8.12e-07 1.000 -1803.005 1803.007 171 | x158 -0.0006 228.269 -2.56e-06 1.000 -447.399 447.398 172 | x159 6.925e-05 6.86e+04 1.01e-09 1.000 -1.34e+05 1.34e+05 173 | x160 3.982e-05 4.49e+04 8.86e-10 1.000 -8.81e+04 8.81e+04 174 | x161 -6.224e-06 2.73e+06 -2.28e-12 1.000 -5.36e+06 5.36e+06 175 | x162 -9.806e-06 6.68e+05 -1.47e-11 1.000 -1.31e+06 1.31e+06 176 | x163 -1.032e-06 3.44e+05 -3e-12 1.000 -6.74e+05 6.74e+05 177 | x164 -1.157e-06 8.08e+06 -1.43e-13 1.000 -1.58e+07 1.58e+07 178 | x165 -5.681e-07 4.3e+05 -1.32e-12 1.000 -8.44e+05 8.44e+05 179 | x166 -4.089e-14 7.07e+14 -5.79e-29 1.000 -1.38e+15 1.38e+15 180 | x167 1.654e-15 1e+15 1.65e-30 1.000 -1.96e+15 1.96e+15 181 | x168 -4.649e-17 7.97e+14 -5.83e-32 1.000 -1.56e+15 1.56e+15 182 | x169 3.08e-17 5.08e+14 6.06e-32 1.000 -9.97e+14 9.97e+14 183 | x170 9.146e-16 2.43e+14 3.76e-30 1.000 -4.76e+14 4.76e+14 184 | ============================================================================== 185 | 186 | Possibly complete quasi-separation: A fraction 0.39 of observations can be 187 | perfectly predicted. This might indicate that there is complete 188 | quasi-separation. In this case some parameters will not be identified. -------------------------------------------------------------------------------- /stat/pca_plot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import seaborn as sns 7 | from sklearn.decomposition import PCA 8 | features = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_features.csv',header=None) 9 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 10 | feature = [str(i) for i in range(165)] 11 | features.columns = ["txId","time_step"] + feature 12 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 13 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 14 | data = features[(features['class']=='1') | (features['class']=='2')] 15 | X = data[feature] 16 | Y = data['class'] 17 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 18 | pca = PCA(n_components=2).fit(X) 19 | pcaf = pca.transform(X) 20 | 21 | trans = pd.DataFrame() 22 | trans['x'] = pcaf[:,0] 23 | trans['y'] = pcaf[:,1] 24 | trans["illicit"] = Y 25 | 26 | plt.figure(figsize=(16,10)) 27 | sns.scatterplot( 28 | x="x", y="y", 29 | hue="illicit", 30 | palette=sns.color_palette("hls",2), 31 | data=trans, 32 | legend="full", 33 | alpha=0.3 34 | ) 35 | 36 | plt.savefig('../image/llicit2D.png') -------------------------------------------------------------------------------- /stat/run_list.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | for f in *.py; do ./"$f"; done -------------------------------------------------------------------------------- /stat/time_series_modified.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import pandas as pd 4 | import numpy as np 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | features = pd.read_csv('../../elliptic_bitcoin_dataset/full_data.csv',header=None) 8 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 9 | feature = [str(i) for i in range(171)] 10 | features.columns = ["txId","time_step"] + feature 11 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 12 | 13 | 14 | for i in range(171): 15 | fig, axes = plt.subplots(2, 4) 16 | fig.suptitle(f'Feature {i}') 17 | features1 = features[features['class'] == '1'] 18 | group = features1[f'{i}'].groupby(features1['time_step']) 19 | group.mean().plot(ax=axes[0, 0], figsize=(20,10),color='green') 20 | axes[0, 0].set_title('Licit mean') 21 | 22 | group = features1[f'{i}'].groupby(features1['time_step']) 23 | group.var().plot(ax=axes[1, 0], figsize=(20,10), color='green') 24 | axes[1, 0].set_title('Licit variance') 25 | 26 | features2 = features[features['class'] == '2'] 27 | group = features2[f'{i}'].groupby(features2['time_step']) 28 | group.mean().plot(ax=axes[0, 1], figsize=(20,10), color='red') 29 | axes[0, 1].set_title('Illicit mean') 30 | group = features2[f'{i}'].groupby(features2['time_step']) 31 | group.var().plot(ax=axes[1, 1], figsize=(20,10), color='red') 32 | axes[1, 1].set_title('Illicit variance') 33 | 34 | featuresu = features[features['class'] == 'unknown'] 35 | group = featuresu[f'{i}'].groupby(featuresu['time_step']) 36 | group.mean().plot(ax=axes[0, 2], figsize=(20,10)) 37 | axes[0, 2].set_title('Unknown mean') 38 | group = featuresu[f'{i}'].groupby(featuresu['time_step']) 39 | group.var().plot(ax=axes[1, 2], figsize=(20,10)) 40 | axes[1, 2].set_title('Unknown variance') 41 | 42 | 43 | group = features[f'{i}'].groupby(features['time_step']) 44 | group.mean().plot(ax=axes[0, 3], figsize=(20,10), color='black') 45 | axes[0, 3].set_title('Total mean') 46 | group = features[f'{i}'].groupby(features['time_step']) 47 | group.var().plot(ax=axes[1, 3], figsize=(20,10), color='black') 48 | axes[1, 3].set_title('Total variance') 49 | if i > 164: 50 | plt.savefig(f'../image/Feature{i:03}.png') 51 | -------------------------------------------------------------------------------- /stat/time_series_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import pandas as pd 4 | import numpy as np 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | features = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_features.csv',header=None) 8 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 9 | feature = [str(i) for i in range(165)] 10 | features.columns = ["txId","time_step"] + feature 11 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 12 | 13 | 14 | for i in range(165): 15 | fig, axes = plt.subplots(2, 4) 16 | fig.suptitle(f'Feature {i}') 17 | features1 = features[features['class'] == '1'] 18 | group = features1[f'{i}'].groupby(features1['time_step']) 19 | group.mean().plot(ax=axes[0, 0], figsize=(20,10),color='green') 20 | axes[0, 0].set_title('Licit mean') 21 | 22 | group = features1[f'{i}'].groupby(features1['time_step']) 23 | group.var().plot(ax=axes[1, 0], figsize=(20,10), color='green') 24 | axes[1, 0].set_title('Licit variance') 25 | 26 | features2 = features[features['class'] == '2'] 27 | group = features2[f'{i}'].groupby(features2['time_step']) 28 | group.mean().plot(ax=axes[0, 1], figsize=(20,10), color='red') 29 | axes[0, 1].set_title('Illicit mean') 30 | group = features2[f'{i}'].groupby(features2['time_step']) 31 | group.var().plot(ax=axes[1, 1], figsize=(20,10), color='red') 32 | axes[1, 1].set_title('Illicit variance') 33 | 34 | 35 | featuresu = features[features['class'] == 'unknown'] 36 | group = featuresu[f'{i}'].groupby(featuresu['time_step']) 37 | group.mean().plot(ax=axes[0, 2], figsize=(20,10)) 38 | axes[0, 2].set_title('Unknown mean') 39 | group = featuresu[f'{i}'].groupby(featuresu['time_step']) 40 | group.var().plot(ax=axes[1, 2], figsize=(20,10)) 41 | axes[1, 2].set_title('Unknown variance') 42 | 43 | 44 | group = features[f'{i}'].groupby(features['time_step']) 45 | group.mean().plot(ax=axes[0, 3], figsize=(20,10), color='black') 46 | axes[0, 3].set_title('Total mean') 47 | group = features[f'{i}'].groupby(features['time_step']) 48 | group.var().plot(ax=axes[1, 3], figsize=(20,10), color='black') 49 | axes[1, 3].set_title('Total variance') 50 | 51 | plt.savefig(f'../image/Feature{i:03}.png') -------------------------------------------------------------------------------- /stat/vif_modified.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import pandas as pd 4 | import numpy as np 5 | from sklearn.linear_model import LinearRegression 6 | import statsmodels.api as sm 7 | from scipy import stats 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | from statsmodels.stats.outliers_influence import variance_inflation_factor 11 | from statsmodels.tools.tools import add_constant 12 | features = pd.read_csv('../../elliptic_bitcoin_dataset/full_data.csv',header=None) 13 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 14 | feature = [str(i) for i in range(171)] 15 | features.columns = ["txId","time_step"] + feature 16 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 17 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 18 | data = features[(features['class']=='1') | (features['class']=='2')] 19 | X = data[feature] 20 | Y = data['class'] 21 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 22 | X = add_constant(X.dropna()) 23 | Seri = pd.Series([variance_inflation_factor(X.values, i) for i in range(X.shape[1])], index=X.columns) 24 | 25 | fi = open('./vif_modified.txt', 'w') 26 | fi.write(f"{Seri.to_string()}") 27 | fi.close() 28 | plt.tick_params( 29 | axis='x', 30 | which='both', 31 | bottom=False, 32 | top=False, 33 | labelbottom=False) 34 | Seri.plot.bar(logy=True) 35 | plt.savefig("../image/vif_modified.png") 36 | 37 | -------------------------------------------------------------------------------- /stat/vif_modified.txt: -------------------------------------------------------------------------------- 1 | const 0.000000e+00 2 | 0 4.503600e+15 3 | 1 2.732018e+00 4 | 2 3.263728e+00 5 | 3 4.868756e+13 6 | 4 2.251800e+15 7 | 5 8.509929e+01 8 | 6 2.635948e+00 9 | 7 5.324393e+03 10 | 8 2.285745e+02 11 | 9 1.976016e+02 12 | 10 4.283634e+03 13 | 11 2.181967e+01 14 | 12 2.295572e+01 15 | 13 4.414793e+06 16 | 14 3.270851e+00 17 | 15 1.146422e+02 18 | 16 1.984976e+02 19 | 17 2.487183e+02 20 | 18 2.106249e+02 21 | 19 8.373636e+01 22 | 20 8.536475e+01 23 | 21 1.139813e+10 24 | 22 1.515422e+10 25 | 23 9.462517e+09 26 | 24 1.286743e+15 27 | 25 1.380600e+04 28 | 26 1.329379e+04 29 | 27 1.139447e+10 30 | 28 1.515267e+10 31 | 29 9.462765e+09 32 | 30 4.503600e+15 33 | 31 1.381708e+04 34 | 32 1.330383e+04 35 | 33 1.808506e+04 36 | 34 3.884107e+03 37 | 35 1.031519e+02 38 | 36 1.332346e+04 39 | 37 8.484235e+01 40 | 38 8.497261e+01 41 | 39 5.219839e+02 42 | 40 3.729848e+03 43 | 41 2.551161e+02 44 | 42 2.018652e+03 45 | 43 2.529275e+02 46 | 44 2.487591e+02 47 | 45 2.507779e+03 48 | 46 5.148675e+03 49 | 47 2.497947e+02 50 | 48 3.295528e+03 51 | 49 2.602637e+02 52 | 50 2.561149e+02 53 | 51 9.317100e+01 54 | 52 4.456776e+01 55 | 53 2.512126e+01 56 | 54 1.003550e+02 57 | 55 3.132797e+01 58 | 56 3.212488e+01 59 | 57 5.483356e+08 60 | 58 8.055852e+08 61 | 59 7.653541e+09 62 | 60 2.649176e+14 63 | 61 4.284899e+03 64 | 62 4.196760e+03 65 | 63 5.481720e+08 66 | 64 8.057537e+08 67 | 65 7.651181e+09 68 | 66 2.649176e+14 69 | 67 4.288110e+03 70 | 68 4.198624e+03 71 | 69 3.267217e+01 72 | 70 6.546975e+02 73 | 71 8.043277e+03 74 | 72 1.577600e+05 75 | 73 1.393005e+02 76 | 74 1.394124e+02 77 | 75 4.614884e+02 78 | 76 3.637611e+03 79 | 77 2.179180e+04 80 | 78 1.176367e+04 81 | 79 3.349488e+02 82 | 80 3.377383e+02 83 | 81 5.442139e+02 84 | 82 3.152351e+03 85 | 83 2.149493e+04 86 | 84 1.173446e+04 87 | 85 3.326355e+02 88 | 86 3.352830e+02 89 | 87 3.955408e+01 90 | 88 5.799412e+01 91 | 89 9.736991e+01 92 | 90 1.008442e+02 93 | 91 1.051789e+02 94 | 92 1.059648e+02 95 | 93 3.265044e+02 96 | 94 1.467210e+01 97 | 95 3.036091e+01 98 | 96 2.800323e+02 99 | 97 1.773487e+01 100 | 98 1.812639e+01 101 | 99 2.088530e+02 102 | 100 1.890315e+03 103 | 101 2.248910e+01 104 | 102 1.230733e+03 105 | 103 4.413078e+01 106 | 104 4.440494e+01 107 | 105 8.447656e+01 108 | 106 2.648655e+01 109 | 107 1.710208e+01 110 | 108 6.672923e+01 111 | 109 2.344195e+01 112 | 110 2.315009e+01 113 | 111 3.775287e+01 114 | 112 5.256231e+01 115 | 113 1.448686e+01 116 | 114 4.749934e+01 117 | 115 1.159106e+02 118 | 116 1.165168e+02 119 | 117 7.664211e+01 120 | 118 8.924717e+00 121 | 119 2.446292e+01 122 | 120 7.597567e+01 123 | 121 4.543590e+01 124 | 122 4.543376e+01 125 | 123 4.412965e+01 126 | 124 2.629122e+01 127 | 125 2.380610e+01 128 | 126 4.653240e+01 129 | 127 3.081152e+01 130 | 128 3.071422e+01 131 | 129 1.862950e+01 132 | 130 3.581008e+01 133 | 131 4.081771e+02 134 | 132 2.597422e+02 135 | 133 8.348953e+01 136 | 134 8.414461e+01 137 | 135 1.023392e+03 138 | 136 8.369500e+01 139 | 137 6.551080e+01 140 | 138 8.417779e+02 141 | 139 7.835077e+01 142 | 140 7.916055e+01 143 | 141 8.003040e+00 144 | 142 1.443072e+01 145 | 143 5.292780e+01 146 | 144 4.580615e+01 147 | 145 1.133577e+02 148 | 146 1.129433e+02 149 | 147 3.996121e+01 150 | 148 8.025378e+01 151 | 149 2.778780e+01 152 | 150 7.640462e+01 153 | 151 5.366918e+02 154 | 152 5.376407e+02 155 | 153 7.348205e+00 156 | 154 2.542570e+01 157 | 155 2.703730e+01 158 | 156 2.109586e+01 159 | 157 1.335916e+02 160 | 158 1.334584e+02 161 | 159 3.907109e+00 162 | 160 1.766131e+01 163 | 161 5.093146e+01 164 | 162 1.849289e+01 165 | 163 1.046127e+02 166 | 164 1.046372e+02 167 | 165 inf 168 | 166 5.203936e+03 169 | 167 4.740631e+14 170 | 168 3.816610e+13 171 | 169 5.270245e+03 172 | 170 2.344076e+02 -------------------------------------------------------------------------------- /stat/vif_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import pandas as pd 4 | import numpy as np 5 | from sklearn.linear_model import LinearRegression 6 | import statsmodels.api as sm 7 | from scipy import stats 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | from statsmodels.stats.outliers_influence import variance_inflation_factor 11 | from statsmodels.tools.tools import add_constant 12 | features = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_features.csv',header=None) 13 | classes = pd.read_csv('../../elliptic_bitcoin_dataset/elliptic_txs_classes.csv') 14 | feature = [str(i) for i in range(165)] 15 | features.columns = ["txId","time_step"] + feature 16 | features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left') 17 | features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x) 18 | data = features[(features['class']=='1') | (features['class']=='2')] 19 | X = data[feature] 20 | Y = data['class'] 21 | Y = Y.apply(lambda x: 0 if x == '2' else 1) 22 | X = add_constant(X.dropna()) 23 | Seri = pd.Series([variance_inflation_factor(X.values, i) for i in range(X.shape[1])], index=X.columns) 24 | 25 | 26 | fi = open('./vif_raw.txt', 'w') 27 | fi.write(f"{Seri.to_string()}") 28 | fi.close() 29 | plt.tick_params( 30 | axis='x', 31 | which='both', 32 | bottom=False, 33 | top=False, 34 | labelbottom=False) 35 | Seri.plot.bar(logy=True) 36 | plt.savefig("../image/vif_raw.png") 37 | -------------------------------------------------------------------------------- /stat/vif_raw.txt: -------------------------------------------------------------------------------- 1 | const 4.321167e+00 2 | 0 1.515528e+02 3 | 1 2.708498e+00 4 | 2 3.240588e+00 5 | 3 8.750366e+01 6 | 4 4.415380e+06 7 | 5 8.428652e+01 8 | 6 2.630342e+00 9 | 7 5.584780e+03 10 | 8 2.295606e+02 11 | 9 2.078734e+02 12 | 10 4.489654e+03 13 | 11 2.161655e+01 14 | 12 2.267576e+01 15 | 13 4.415198e+06 16 | 14 3.270485e+00 17 | 15 1.145579e+02 18 | 16 1.991821e+02 19 | 17 2.486756e+02 20 | 18 2.105129e+02 21 | 19 8.477703e+01 22 | 20 8.640953e+01 23 | 21 1.396021e+10 24 | 22 1.878719e+10 25 | 23 9.483139e+09 26 | 24 inf 27 | 25 1.355234e+04 28 | 26 1.303429e+04 29 | 27 1.395571e+10 30 | 28 1.878480e+10 31 | 29 9.483199e+09 32 | 30 inf 33 | 31 1.356244e+04 34 | 32 1.304392e+04 35 | 33 2.220293e+04 36 | 34 4.989421e+03 37 | 35 1.913740e+02 38 | 36 inf 39 | 37 8.493381e+01 40 | 38 8.506860e+01 41 | 39 5.102916e+02 42 | 40 3.817264e+03 43 | 41 2.623396e+02 44 | 42 2.051798e+03 45 | 43 2.511958e+02 46 | 44 2.475310e+02 47 | 45 2.578204e+03 48 | 46 5.357036e+03 49 | 47 2.532446e+02 50 | 48 3.415254e+03 51 | 49 2.585963e+02 52 | 50 2.549478e+02 53 | 51 9.414077e+01 54 | 52 4.460562e+01 55 | 53 2.504047e+01 56 | 54 1.015092e+02 57 | 55 3.136893e+01 58 | 56 3.216821e+01 59 | 57 5.502455e+08 60 | 58 8.103640e+08 61 | 59 7.709769e+09 62 | 60 inf 63 | 61 4.341470e+03 64 | 62 4.252911e+03 65 | 63 5.500922e+08 66 | 64 8.105460e+08 67 | 65 7.711445e+09 68 | 66 inf 69 | 67 4.344960e+03 70 | 68 4.255050e+03 71 | 69 3.284006e+01 72 | 70 6.617582e+02 73 | 71 8.095708e+03 74 | 72 inf 75 | 73 1.405635e+02 76 | 74 1.406648e+02 77 | 75 4.586207e+02 78 | 76 3.989636e+03 79 | 77 2.413703e+04 80 | 78 1.300953e+04 81 | 79 3.382287e+02 82 | 80 3.410041e+02 83 | 81 5.377838e+02 84 | 82 3.207094e+03 85 | 83 2.191532e+04 86 | 84 1.195370e+04 87 | 85 3.359507e+02 88 | 86 3.385863e+02 89 | 87 4.147110e+01 90 | 88 5.853742e+01 91 | 89 9.788100e+01 92 | 90 1.028861e+02 93 | 91 1.066128e+02 94 | 92 1.073980e+02 95 | 93 3.432430e+02 96 | 94 1.480605e+01 97 | 95 3.020629e+01 98 | 96 2.940972e+02 99 | 97 1.774196e+01 100 | 98 1.812994e+01 101 | 99 2.106323e+02 102 | 100 1.525181e+03 103 | 101 2.236693e+01 104 | 102 1.221146e+03 105 | 103 4.418867e+01 106 | 104 4.446126e+01 107 | 105 9.738633e+01 108 | 106 2.857048e+01 109 | 107 1.711220e+01 110 | 108 7.607623e+01 111 | 109 2.346536e+01 112 | 110 2.317413e+01 113 | 111 3.849147e+01 114 | 112 5.306878e+01 115 | 113 1.448474e+01 116 | 114 4.840727e+01 117 | 115 1.161320e+02 118 | 116 1.167360e+02 119 | 117 9.038749e+01 120 | 118 9.332372e+00 121 | 119 2.477444e+01 122 | 120 8.747488e+01 123 | 121 4.542464e+01 124 | 122 4.542206e+01 125 | 123 4.448754e+01 126 | 124 2.617159e+01 127 | 125 2.369142e+01 128 | 126 4.687450e+01 129 | 127 3.085321e+01 130 | 128 3.075633e+01 131 | 129 1.869426e+01 132 | 130 3.585644e+01 133 | 131 4.089074e+02 134 | 132 2.602966e+02 135 | 133 8.453261e+01 136 | 134 8.518968e+01 137 | 135 1.031476e+03 138 | 136 8.431650e+01 139 | 137 6.679315e+01 140 | 138 8.484040e+02 141 | 139 7.937485e+01 142 | 140 8.018032e+01 143 | 141 8.039644e+00 144 | 142 1.454277e+01 145 | 143 5.368986e+01 146 | 144 4.655835e+01 147 | 145 1.150091e+02 148 | 146 1.145950e+02 149 | 147 4.029100e+01 150 | 148 8.071540e+01 151 | 149 2.798153e+01 152 | 150 7.686676e+01 153 | 151 5.434265e+02 154 | 152 5.443683e+02 155 | 153 7.534526e+00 156 | 154 2.566684e+01 157 | 155 2.734679e+01 158 | 156 2.149780e+01 159 | 157 1.354709e+02 160 | 158 1.353384e+02 161 | 159 3.902579e+00 162 | 160 1.766565e+01 163 | 161 5.103286e+01 164 | 162 1.853167e+01 165 | 163 1.060750e+02 166 | 164 1.060982e+02 --------------------------------------------------------------------------------