├── .idea
├── .gitignore
├── DLpTCR.iml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── LICENSE
├── README.md
├── code
├── API.py
├── DLpTCR_server.py
├── Model_Predict_Feature_Extraction.py
├── Train_Test_Onehot_Chem_Feature_Extraction.py
├── Train_Test_PCA_Feature_Extraction.py
├── aaindexValues.py
├── fold
│ ├── CNN_A_fold_onehot.py
│ ├── CNN_A_fold_onehot_chem.py
│ ├── CNN_A_fold_pca.py
│ ├── CNN_B_fold_onehot.py
│ ├── CNN_B_fold_onehot_chem.py
│ ├── CNN_B_fold_pca.py
│ ├── FULL_A_fold_onehot.py
│ ├── FULL_A_fold_onehot_chem.py
│ ├── FULL_A_fold_pca.py
│ ├── FULL_B_fold_onehot.py
│ ├── FULL_B_fold_onehot_chem.py
│ ├── FULL_B_fold_pca.py
│ ├── RESNET_A_fold_onehot.py
│ ├── RESNET_A_fold_onehot_chem.py
│ ├── RESNET_A_fold_pca.py
│ ├── RESNET_B_fold_onehot.py
│ ├── RESNET_B_fold_onehot_chem.py
│ └── RESNET_B_fold_pca.py
└── train
│ ├── CNN_A_ALL_onehot.py
│ ├── CNN_B_ALL_pca20.py
│ ├── FULL_A_ALL_onehot.py
│ ├── FULL_B_ALL_pca18.py
│ ├── RESNET_A_ALL_pca15.py
│ └── RESNET_B_ALL_pca10.py
├── data
├── Example_file.xlsx
├── TCRAB_IEDB.csv
├── TCRA_COVID-19.csv
├── TCRA_test.csv
├── TCRA_train.csv
├── TCRB_COVID-19.csv
├── TCRB_Train.csv
├── TCRB_test.csv
├── TRA-VDJdb_TCR cross-reactivity.rar
└── TRB_VDJdb_TCR cross-reactivity.rar
├── model
├── CNN_A_ALL_onehot.h5
├── CNN_B_ALL_pca20.h5
├── FULL_A_ALL_onehot.h5
├── FULL_B_ALL_pca18.h5
├── RESNET_A_ALL_pca15.h5
└── RESNET_B_ALL_pca10.h5
├── pca
├── Amino_Acids_PCAVal10_dict.txt
├── Amino_Acids_PCAVal11_dict.txt
├── Amino_Acids_PCAVal12_dict.txt
├── Amino_Acids_PCAVal13_dict.txt
├── Amino_Acids_PCAVal14_dict.txt
├── Amino_Acids_PCAVal15_dict.txt
├── Amino_Acids_PCAVal16_dict.txt
├── Amino_Acids_PCAVal17_dict.txt
├── Amino_Acids_PCAVal18_dict.txt
├── Amino_Acids_PCAVal19_dict.txt
├── Amino_Acids_PCAVal20_dict.txt
├── Amino_Acids_PCAVal8_dict.txt
└── Amino_Acids_PCAVal9_dict.txt
└── requirements.txt
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # 基于编辑器的 HTTP 客户端请求
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/DLpTCR.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The DLpTCR code is freely accessible to all interested parties.
2 | It is free for academic, non-profit, and research use, and can be licensed for commercial use.
3 |
4 | To use this software for the development of a commercial product, including but not limited to software, service, or pharmaceuticals, please contact the lead corresponding author.
5 |
6 | Redistribution of the code with or without modification is not permitted without explicit written permission by the lead corresponding author.
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | DLpTCR: an ensemble deep learning framework for predicting immunogenic peptide recognized by T cell receptor
2 | ================================================
3 |
4 |
5 |
6 | Overview
7 | --------
8 | Here, we report DLpTCR a computational framework that integrated three deep-learning models
9 | for predicting the likelihood of the interaction between TCR and peptide presented by
10 | MHC molecules. DLpTCR obtained excellent performance on independent testing dataset,
11 | thereby allowing robust identification of immunogenic T cell epitopes.
12 |
13 | Installation
14 | ------------
15 | Download DLpTCR by
16 | ```sh
17 | git clone https://github.com/JiangBioLab/DLpTCR
18 | ```
19 | This package can be installed in this ways (the easy way):
20 |
21 | # If needed:
22 | pip install -r requirements.txt
23 | # Or
24 | conda install --yes --file requirements.txt
25 | # Or you can create a new environment
26 | conda create --name dlptcr --file requirements.txt
27 |
28 |
29 | Note the code depends on the `numpy`, `tensorflow` and other packages. So have those
30 | installed first. The build will likely fail if it can't find them. For more information, see:
31 |
32 | + [NumPy](http://www.numpy.org/): Library for efficient matrix math in Python
33 | + [tensorflow](https://tensorflow.google.cn/): An end-to-end open source machine learning platform in Python
34 |
35 |
36 | Contents
37 | --------
38 |
39 | data
40 | --------
41 | We collected experimentally verified TCR-pMHC pairs from the VDJdb, IEDB and
42 | TetTCR-seq dataset for constructing a high-quality benchmark dataset. These peptide-TCR pairs were
43 | split into training, testing and independent testing datasets with regard to their TCR α- and β-chains
44 | so that each peptide-TCR pair only existed in one split, in detail as following:
45 | 1)TCRA_train.csv and TRB_Train.csv are the training datasets for constructing and training the models.
46 | 2)TCRA_test.csv and TCRB_test.csv are the testing datasets for testing the constructed models.
47 | 3)TCRA_COVID-19.csv and TCRB_COVID-19.csv are independent testing data for evaluating the performance of
48 | ensemble classifiers.
49 | 4)TRA-VDJdb_TCR cross-reactivity.rar and TRB_VDJdb_TCR cross-reactivity.rar are used to assess the
50 | prediction ability of ensemble classifiers for TCR cross-reactivity.
51 | 5)TCRAB_IEDB.csv is used to evaluate the integrated model for predicting the peptide-TCRαβ interaction.
52 |
53 | model
54 | --------
55 | The final base classifiers of DLpTCR are deposited in this folder.
56 | 1) FULL_A_ALL_onehot.h5, CNN_A_ALL_onehot.h5 and RESNET_A_ALL_pca15.h5 are the base classifiers of ensemble
57 | model for predicting the peptide-TCRα interaction.
58 | 2) FULL_B_ALL_pca18.h5, CNN_B_ALL_pca20.h5 and RESNET_B_ALL_pca10.h5 are the base classifiers of ensemble
59 | model for predicting the peptide-TCRβ interaction.
60 |
61 | pca
62 | --------
63 | The folder contains the features generated by full training datasets using PCA encoding method.
64 | we padded each sequence of a pair to the maximum length of 20 and encoded them using Principal
65 | Component Analysis (PCA) encoding. For each amino acid, we selected the top 20 PCs explained
66 | over 95% of total data variation and generated different vectors using 8-20 PCs to represent
67 | its biochemical signatures, respectively.
68 |
69 |
70 | code
71 | --------
72 | The source code of feature extraction, five-fold cross-validation, model construction and training,
73 | and prediction are deposited in this floder 'code'.
74 | 1) The source code in folder 'fold' are used to select the appropriate features by five-fold cross validation.
75 | 2) The source code in folder 'train' are used to construct and train the base classifiers.
76 | 3) The source code (XXX_Feature_Extraction.py) is used to implement feature extraction.
77 | 4) The source code (DLpTCR.py) is used to predict the peptide-TCR interaction.
78 |
79 |
80 | How to Use
81 | ----------
82 |
83 | #### Running on GPU or CPU
84 |
85 | After you install DLpTCR, TensorFlow will be installed along with DLpTCR.
86 | Refer to Keras documentation to configure TensorFlow to run on GPU/CPU.
87 | Note that, if you want to use GPU, you also need to install CUDA and cuDNN;
88 | refer to their websites for instructions. If you use "conda install tensorflow-gpu" to install TensorFlow.
89 | CPU is only suitable for predicting not training.
90 |
91 | ### For general users who want to perform immunogenic peptide prediction by our provided model :
92 | cd to the DLpTCR/code folder which contains DLpTCR_server.py, Model_Predict_Feature_Extraction.py.
93 | python
94 | >>> from Feature_Extraction import *
95 | >>> from DLpTCR_server import *
96 | >>> input_file_path = '../data/Example_file.xlsx'
97 |
98 | Please refer to document 'Example_file.xlsx' for the format of the input file.
99 | Column names are not allowed to change.
100 |
101 | >>> model_select = "AB"
102 |
103 | model:pTCRα user_select = "A"
104 | model:pTCRβ user_select = "B"
105 | model:pTCRαβ user_select = "AB"
106 |
107 | >>> job_dir_name = 'test'
108 | >>> user_dir = './user/' + str(job_dir_name) + '/'
109 |
110 |
111 | The predicted files will be stored in the path "user_dir".
112 |
113 | >>> user_dir_Exists = os.path.exists(user_dir)
114 | >>> if not user_dir_Exists:
115 | os.makedirs(user_dir)
116 |
117 | >>> error_info,TCRA_cdr3,TCRB_cdr3,Epitope = deal_file(input_file_path, user_dir, model_select)
118 | >>> output_file_path = save_outputfile(user_dir, user_select, input_file_path,TCRA_cdr3,TCRB_cdr3,Epitope)
119 | also,you can use the API.py to predict the peptide-TCR interaction.
120 |
121 | python API.py
122 |
123 | ### For advanced users who want to perform training and predicting by using their own data:
124 |
125 | #### For custom training:
126 | CPU is only suitable for prediction not training.
127 | For custom general training using user’s training data:
128 | ```sh
129 | python Train_Test_Onehot_Chem_Feature_Extraction.py
130 | python Train_Test_PCA_Feature_Extraction.py
131 | ```
132 |
133 |
134 | The code in Folder DLpTCR/code/fold is then used for 5-fold cross-validation to filter out the best features:
135 | ```sh
136 | #example
137 | python CNN_A_fold_onehot.py
138 | ```
139 | The code in folder DLpTCR/code/train is then used to filter out the best features for model training
140 |
141 | ```sh
142 | #example
143 | python CNN_A_ALL_onehot.py
144 | ```
145 |
146 | ### Citation:
147 |
148 | Please cite the following paper for using DLpTCR:
149 |
150 | DLpTCR: an ensemble deep learning framework for predicting immunogenic peptide recognized by T cell receptor
151 |
152 |
--------------------------------------------------------------------------------
/code/API.py:
--------------------------------------------------------------------------------
1 | from Model_Predict_Feature_Extraction import *
2 |
3 | from DLpTCR_server import *
4 |
5 | input_file_path = '../data/Example_file.xlsx'
6 |
7 | '''
8 | Please refer to document 'Example_file.xlsx' for the format of the input file.
9 | Column names are not allowed to change.
10 | '''
11 |
12 |
13 | model_select = "AB"
14 |
15 | '''
16 | model:pTCRα user_select = "A"
17 | model:pTCRβ user_select = "B"
18 | model:pTCRαβ user_select = "AB"
19 |
20 | '''
21 |
22 | job_dir_name = 'test'
23 | user_dir = './user/' + str(job_dir_name) + '/'
24 |
25 | '''
26 | The predicted files will be stored in the path "user_dir".
27 | '''
28 | user_dir_Exists = os.path.exists(user_dir)
29 | if not user_dir_Exists:
30 | os.makedirs(user_dir)
31 |
32 | error_info,TCRA_cdr3,TCRB_cdr3,Epitope = deal_file(input_file_path, user_dir, model_select)
33 | output_file_path = save_outputfile(user_dir, model_select , input_file_path,TCRA_cdr3,TCRB_cdr3,Epitope)
34 |
--------------------------------------------------------------------------------
/code/Train_Test_Onehot_Chem_Feature_Extraction.py:
--------------------------------------------------------------------------------
1 | import pandas as pd #映入模块
2 | from collections import Counter
3 | import scipy.io as sio
4 | from Bio import SeqIO
5 | #import cv2
6 | import os
7 | import scipy.misc
8 | import numpy as np
9 | import csv
10 | import os
11 | from numpy import *
12 | import pandas as pd
13 |
14 |
15 |
16 |
17 | def AA_ONE_HOT(AA):
18 |
19 |
20 | one_hot_dict = {
21 | 'A':[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
22 | 'C':[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
23 | 'D':[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
24 | 'E':[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
25 | 'F':[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
26 | 'G':[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
27 | 'H':[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
28 | 'I':[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
29 | 'K':[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
30 | 'L':[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
31 | 'M':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
32 | 'N':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
33 | 'P':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
34 | 'Q':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
35 | 'R':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
36 | 'S':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
37 | 'T':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
38 | 'V':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
39 | 'W':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
40 | 'Y':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
41 | 'X':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
42 | }
43 |
44 | coding_arr = np.zeros((len(AA),20),dtype=float)
45 |
46 | for i in range(len(AA)):
47 |
48 | coding_arr[i] = one_hot_dict[AA[i]]
49 |
50 |
51 | return coding_arr
52 |
53 |
54 | def AA_CHEM(AA):
55 |
56 | AA_CHEM_dict = {
57 | 'A':[-0.591, -1.302, -0.733, 1.57, -0.146, 0.62, -0.5, 15, 2.35, 9.87, 6.11, -1.338, -3.102, 0.52, 1.18, 4.349, -0.368, 0.36, 0.67, -9.475],
58 | 'C':[-1.343, 0.465, -0.862, -1.02, -0.255, 0.29, -1, 47, 1.71, 10.78, 5.02, -1.511, 0.957, 1.12, 1.89, 4.686, 4.53, 0.70, 0.38, -12.210],
59 | 'D':[1.05, 0.302, -3.656, -0.259, -3.242, -0.9, 3, 59, 1.88, 9.6, 2.98, -0.204, 0.424, 0.77, 0.05, 4.765, 2.06, -1.09, -1.2, -12.144],
60 | 'E':[1.357, -1.453, 1.477, 0.113, -0.837, -0.74, 3, 73, 2.19, 9.67, 3.08, -0.365, 2.009, 0.76, 0.11, 4.295, 1.77, -0.83, -0.76, -13.815],
61 | 'F':[-1.006, -0.59, 1.891, -0.397, 0.412, 1.19, -2.5, 91, 2.58, 9.24, 5.91, 2.877, -0.466, 0.86, 1.96, 4.663, 1.06, 1.01, 2.3, -20.504],
62 | 'G':[-0.384, 1.652, 1.33, 1.045, 2.064, 0.48, 0, 1, 2.34, 9.6, 6.06, -1.097, -2.746, 0.56, 0.49, 3.972, -0.525, -0.82, 0, -7.592],
63 | 'H':[0.336, -0.417, -1.673, -1.474, -0.078, -0.4, -0.5, 82, 1.78, 8.97, 7.64, 2.269, -0.223, 0.94, 0.31, 4.630, 0, 0.16, 0.64, -17.550],
64 | 'I':[-1.239, -0.547, 2.131, 0.393, 0.816, 1.38, -1.8, 57, 2.32, 9.76, 6.04, -1.741, 0.424, 0.65, 1.45, 4.224, 0.791, 2.17, 1.9, -15.608],
65 | 'K':[1.831, -0.561, 0.533, -0.277, 1.648, -1.5, 3, 73, 2.2, 8.9, 9.47, -1.822, 3.95, 0.81, 0.06, 4.358, 0, -0.56, -0.57, -12.366],
66 | 'L':[-1.019, -0.987, -1.505, 1.266, -0.912, 1.06, -1.8, 57, 2.36, 9.6, 6.04, -1.741, 0.424, 0.58, 3.23, 4.385, 1.07, 1.18, 1.9, -15.728],
67 | 'M':[-0.663, -1.524, 2.219, -1.005, 1.212, 0.64, -1.3, 75, 2.28, 9.21, 5.74, -1.741, 2.484, 1.25, 2.67, 4.513, 0.656, 1.21, 2.4, -15.704],
68 | 'N':[0.945, 0.828, 1.299, -0.169, 0.933, -0.78, 0.2, 58, 2.18, 9.09, 10.76, -0.204, 0.424, 0.79, 0.23, 4.755, 0, -0.9, -0.6, -12.480],
69 | 'P':[0.189, 2.081, -1.628, 0.421, -1.392, 0.12, 0, 42, 1.99, 10.6, 6.3, 1.979, -2.404, 0.61, 0.76, 4.471, -2.24, -0.06, 1.2, -11.893],
70 | 'Q':[0.931, -0.179, -3.005, -0.503, -1.853, -0.85, 0.2, 72, 2.17, 9.13, 5.65, -0.365, 2.009, 0.86, 0.72, 4.373, 0.731, -1.05, -0.22, -13.689],
71 | 'R':[1.538, -0.055, 1.502, 0.44, 2.897, -2.53, 3, 101, 2.18, 9.09, 10.76, 1.169, 3.06, 0.6, 0.20, 4.396, -1.03, -0.52, -2.1, -16.225],
72 | 'S':[-0.228, 1.399, -4.76, 0.67, -2.647, -0.18, 0.3, 31, 2.21, 9.15, 5.68, -1.511, 0.957, 0.64, 0.97, 4.498, -0.524, -0.6, 0.01, -10.518],
73 | 'T':[-0.032, 0.326, 2.213, 0.908, 1.313, -0.05, -0.4, 45, 2.15, 9.12, 5.6, -1.641, -1.339, 0.56, 0.84, 4.346, 0, -1.20, 0.52, -12.369],
74 | 'V':[-1.337, -0.279, -0.544, 1.242, -1.262, 1.08, -1.5, 43, 2.29, 9.74, 6.02, -1.641, -1.339, 0.54, 1.08, 4.184, 0.401, 1.21, 1.5, -13.867],
75 | 'W':[-0.595, 0.009, 0.672, -2.128, -0.184, 0.81, -3.4, 130, 2.38, 9.39, 5.88, 5.913, -1, 1.82, 0.77, 4.702, 1.60, 1.31, 2.6, -26.166],
76 | 'Y':[0.26, 0.83, 3.097, -0.838, 1.512, 0.26, -2.3, 107, 2.2, 9.11, 5.63, 2.714, -0.672, 0.98, 0.39,4.604, 4.91, 1.05, 1.6, -20.232],
77 | 'X':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
78 | }
79 |
80 | coding_arr = np.zeros((len(AA),20),dtype=float)
81 |
82 | for i in range(len(AA)):
83 |
84 | coding_arr[i] = AA_CHEM_dict[AA[i]]
85 |
86 |
87 | return coding_arr
88 |
89 |
90 |
91 | csv_file_path = '../data/TCRA_test.csv'
92 | human_TRB = pd.read_csv(csv_file_path)
93 | label = human_TRB.Class_label
94 | cdr3 = human_TRB.CDR3
95 |
96 | epitope = human_TRB.Epitope
97 |
98 | feature_array = np.zeros([len(cdr3),58,20])
99 |
100 | for i in range(len(cdr3)):
101 | cdr3_1 = cdr3[i]
102 | epitope_1 = epitope[i]
103 | cdr3_epitope_splice = cdr3_1 + epitope_1
104 | #print(cdr3_epitope_splice)
105 | new_cdr3_epitope_splice = cdr3_epitope_splice
106 |
107 | if len(cdr3_epitope_splice) != 29:
108 | for j in range(29-len(cdr3_epitope_splice)):
109 |
110 | new_cdr3_epitope_splice = 'X' + new_cdr3_epitope_splice
111 |
112 | aa_onehot = AA_ONE_HOT(new_cdr3_epitope_splice)
113 | aa_chen = AA_CHEM(new_cdr3_epitope_splice)
114 |
115 | data = np.append(aa_onehot,aa_chen)
116 | #print(data)
117 | dima = aa_onehot.shape
118 | dimn = aa_chen.shape
119 | cdr3_epitope = data.reshape(dima[0]+dimn[0],dima[1])
120 |
121 | feature_array[i]=cdr3_epitope
122 | np.save('../data/TCRA_test_feature_array',feature_array)
123 |
124 | label_array = np.zeros([len(label),2])
125 |
126 | for k in range(len(label)):
127 | if label[k] == 1:
128 | label_array[k] = [1,0]
129 | else:
130 | label_array[k] = [0,1]
131 | np.save('../data/TCRA_test_label_array',label_array)
132 |
133 |
134 |
135 |
136 | csv_file_path = '../data/TCRA_train.csv'
137 | human_TRB = pd.read_csv(csv_file_path)
138 | label = human_TRB.Class_label
139 | cdr3 = human_TRB.CDR3
140 | epitope = human_TRB.Epitope
141 |
142 | feature_array = np.zeros([len(cdr3),58,20])
143 |
144 | for i in range(len(cdr3)):
145 | cdr3_1 = cdr3[i]
146 | epitope_1 = epitope[i]
147 | cdr3_epitope_splice = cdr3_1 + epitope_1
148 | #print(cdr3_epitope_splice)
149 | new_cdr3_epitope_splice = cdr3_epitope_splice
150 |
151 | if len(cdr3_epitope_splice) != 29:
152 | for j in range(29-len(cdr3_epitope_splice)):
153 |
154 | new_cdr3_epitope_splice = 'X' + new_cdr3_epitope_splice
155 |
156 | aa_onehot = AA_ONE_HOT(new_cdr3_epitope_splice)
157 | aa_chen = AA_CHEM(new_cdr3_epitope_splice)
158 |
159 | data = np.append(aa_onehot,aa_chen)
160 |
161 | dima = aa_onehot.shape
162 | dimn = aa_chen.shape
163 |
164 | cdr3_epitope = data.reshape(dima[0]+dimn[0],dima[1])
165 | feature_array[i]=cdr3_epitope
166 | np.save('../data/TCRA_train_feature_array',feature_array)
167 |
168 | label_array = np.zeros([len(label),2])
169 |
170 | for k in range(len(label)):
171 | if label[k] == 1:
172 | label_array[k] = [1,0]
173 | else:
174 | label_array[k] = [0,1]
175 | np.save('../data/TCRA_train_label_array',label_array)
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 | csv_file_path = '../data/TCRB_test.csv'
194 | human_TRB = pd.read_csv(csv_file_path)
195 | label = human_TRB.Class_label
196 | cdr3 = human_TRB.CDR3
197 |
198 | epitope = human_TRB.Epitope
199 |
200 | feature_array = np.zeros([len(cdr3),58,20])
201 |
202 | for i in range(len(cdr3)):
203 | cdr3_1 = cdr3[i]
204 | epitope_1 = epitope[i]
205 | cdr3_epitope_splice = cdr3_1 + epitope_1
206 | #print(cdr3_epitope_splice)
207 | new_cdr3_epitope_splice = cdr3_epitope_splice
208 |
209 | if len(cdr3_epitope_splice) != 29:
210 | for j in range(29-len(cdr3_epitope_splice)):
211 |
212 | new_cdr3_epitope_splice = 'X' + new_cdr3_epitope_splice
213 |
214 | aa_onehot = AA_ONE_HOT(new_cdr3_epitope_splice)
215 | aa_chen = AA_CHEM(new_cdr3_epitope_splice)
216 |
217 | data = np.append(aa_onehot,aa_chen)
218 | #print(data)
219 | dima = aa_onehot.shape
220 | dimn = aa_chen.shape
221 | cdr3_epitope = data.reshape(dima[0]+dimn[0],dima[1])
222 |
223 | feature_array[i]=cdr3_epitope
224 | np.save('../data/TCRB_test_feature_array',feature_array)
225 |
226 | label_array = np.zeros([len(label),2])
227 |
228 |
229 | for k in range(len(label)):
230 | if label[k] == 1:
231 | label_array[k] = [1,0]
232 | else:
233 | label_array[k] = [0,1]
234 | np.save('../data/TCRB_test_label_array',label_array)
235 |
236 |
237 | csv_file_path = '../data/TCRB_train.csv'
238 | human_TRB = pd.read_csv(csv_file_path)
239 | label = human_TRB.Class_label
240 | cdr3 = human_TRB.CDR3
241 | epitope = human_TRB.Epitope
242 |
243 | feature_array = np.zeros([len(cdr3),58,20])
244 |
245 | for i in range(len(cdr3)):
246 | cdr3_1 = cdr3[i]
247 | epitope_1 = epitope[i]
248 | cdr3_epitope_splice = cdr3_1 + epitope_1
249 | #print(cdr3_epitope_splice)
250 | new_cdr3_epitope_splice = cdr3_epitope_splice
251 |
252 | if len(cdr3_epitope_splice) != 29:
253 | for j in range(29-len(cdr3_epitope_splice)):
254 |
255 | new_cdr3_epitope_splice = 'X' + new_cdr3_epitope_splice
256 |
257 | aa_onehot = AA_ONE_HOT(new_cdr3_epitope_splice)
258 | aa_chen = AA_CHEM(new_cdr3_epitope_splice)
259 |
260 | data = np.append(aa_onehot,aa_chen)
261 |
262 | dima = aa_onehot.shape
263 | dimn = aa_chen.shape
264 |
265 | cdr3_epitope = data.reshape(dima[0]+dimn[0],dima[1])
266 | feature_array[i]=cdr3_epitope
267 | np.save('../data/TCRB_train_feature_array',feature_array)
268 |
269 | label_array = np.zeros([len(label),2])
270 |
271 | for k in range(len(label)):
272 | if label[k] == 1:
273 | label_array[k] = [1,0]
274 | else:
275 | label_array[k] = [0,1]
276 | np.save('../data/TCRB_train_label_array',label_array)
--------------------------------------------------------------------------------
/code/Train_Test_PCA_Feature_Extraction.py:
--------------------------------------------------------------------------------
1 | import pandas
2 | import numpy as np
3 | import os
4 |
5 | from sklearn.utils import shuffle
6 | from sklearn.metrics import accuracy_score, matthews_corrcoef, confusion_matrix
7 | from sklearn.metrics import f1_score,roc_auc_score,recall_score,precision_score
8 |
9 | import tensorflow as tf
10 | from tensorflow.keras import layers
11 | from tensorflow.keras import models
12 | from tensorflow.keras.utils import to_categorical
13 | from tensorflow.keras.optimizers import Adam
14 | from tensorflow.keras.regularizers import l2
15 | from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
16 |
17 | from aaindexValues import aaindex1PCAValues
18 |
19 |
20 | def pca_code(seqs:list, row=30, n_features=16):
21 | aadict = aaindex1PCAValues(n_features)
22 | x = []
23 | col = n_features+1
24 | for i in range(len(seqs)):
25 | seq = seqs[i]
26 | n = len(seq)
27 | t = np.zeros(shape=(row, col))
28 | j = 0
29 | while j < n and j < row:
30 | t[j,:-1] = aadict[seq[j]]
31 | t[j,-1] = 0
32 | j += 1
33 | while j < row:
34 | t[j,-1] = 1
35 | j = j + 1
36 | x.append(t)
37 | return np.array(x)
38 |
39 | def read_seqs(file, model=1):
40 | data = pandas.read_csv(file)
41 | labels = data.Class_label
42 | cdr3 = data.CDR3
43 | epitope = data.Epitope
44 | cdr3_seqs, epit_seqs = [], []
45 | for i in range(len(epitope)):
46 | if model == 1:
47 | cdr3_seqs.append(cdr3[i][2:-1])
48 | elif model == 2:
49 | cdr3_seqs.append(cdr3[i])
50 | epit_seqs.append(epitope[i])
51 |
52 | return cdr3_seqs, epit_seqs, labels
53 |
54 | def load_data(col=20, row=9, m=1):
55 |
56 | train_cdr3_seqs, train_epit_seqs, train_labels = read_seqs(trainFile, m)
57 | x_train = np.ndarray(shape=(len(train_cdr3_seqs), row, col+1, 2)) #改变数据集的通道数量和shape大小
58 | x_train[:,:,:,0] = pca_code(train_cdr3_seqs, row, col) ##第一个通道
59 | x_train[:,:,:,1] = pca_code(train_epit_seqs, row, col) ##第二个通道
60 |
61 | y_train = np.array(train_labels)
62 | y_train = to_categorical(y_train, 2)
63 |
64 | test_cdr3_seqs, test_epit_seqs, test_labels = read_seqs(testFile, m)
65 | x_test = np.ndarray(shape=(len(test_cdr3_seqs), row, col+1, 2))
66 | x_test[:,:,:,0] = pca_code(test_cdr3_seqs, row, col)
67 | x_test[:,:,:,1] = pca_code(test_epit_seqs, row, col)
68 |
69 | y_test = np.array(test_labels)
70 | y_test = to_categorical(y_test, 2)
71 |
72 | indt_cdr3_seqs, indt_epit_seqs, indt_labels = read_seqs(indepFile, m)
73 | x_indt = np.ndarray(shape=(len(indt_cdr3_seqs), row, col+1, 2))
74 | x_indt[:,:,:,0] = pca_code(indt_cdr3_seqs, row, col)
75 | x_indt[:,:,:,1] = pca_code(indt_epit_seqs, row, col)
76 |
77 | y_indt = np.array(indt_labels)
78 | y_indt = to_categorical(y_indt, 2)
79 |
80 |
81 | return (x_train, y_train), (x_test, y_test),(x_indt, y_indt)
82 |
83 |
84 |
85 | trainFile = '../data/TCRB_train.csv'
86 | testFile = '../data/TCRB_test.csv' ####
87 | indepFile = '../data/TCRB_COVID-19.csv' ####
88 |
89 |
90 | m=2
91 | row = 20
92 | #col = 18 #PCA 降维后的特征数量
93 |
94 | for i in range(8,21):
95 | col = i #PCA 降维后的特征数量
96 | (x_train, y_train), (x_test, y_test), (x_indt, y_indt) = load_data(col=col, row=row, m=m)
97 |
98 |
99 |
100 | np.save('../data/train_TCRB_PCA{}_feature_array'.format(col),x_train)
101 | np.save('../data/train_TCRB_PCA{}_label_array'.format(col),y_train)
102 |
103 | np.save('../data/test_TCRB_PCA{}_feature_array'.format(col),x_test)
104 | np.save('../data/test_TCRB_PCA{}_label_array'.format(col),y_test)
105 |
106 |
107 |
108 |
109 |
110 | trainFile = '../data/TCRA_train.csv'
111 | testFile = '../data/TCRA_test.csv' ####
112 | indepFile = '../data/TCRA_COVID-19.csv' ####
113 |
114 |
115 | m=2
116 | row = 20
117 | #col = 18 #PCA 降维后的特征数量
118 |
119 | for i in range(8,21):
120 | col = i #PCA 降维后的特征数量
121 | (x_train, y_train), (x_test, y_test), (x_indt, y_indt) = load_data(col=col, row=row, m=m)
122 |
123 |
124 |
125 | np.save('../data/train_TCRA_PCA{}_feature_array'.format(col),x_train)
126 | np.save('../data/train_TCRA_PCA{}_label_array'.format(col),y_train)
127 |
128 | np.save('../data/test_TCRA_PCA{}_feature_array'.format(col),x_test)
129 | np.save('../data/test_TCRA_PCA{}_label_array'.format(col),y_test)
130 |
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/code/aaindexValues.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | from sklearn.decomposition import PCA
4 | from sklearn.preprocessing import StandardScaler
5 | AminoAcids = 'ARNDCQEGHILKMFPSTWYV'
6 | def aaindex1Values():
7 | AAValues=[]
8 | newlineFlag = False
9 | addValueFlag = False
10 | t = []
11 | with open('./aaindex1') as fr:
12 | lines = fr.readlines()
13 | for line in lines:
14 | if line.startswith('I'):
15 | newlineFlag = True
16 | addValueFlag = True
17 | t = []
18 | continue
19 | elif line.startswith('//'):
20 | newlineFlag = False
21 | if addValueFlag:
22 | AAValues.append(t)
23 |
24 | if newlineFlag:
25 | line = line.strip()
26 | if 'NA' in line:
27 | addValueFlag = False
28 | continue
29 | vals = line.split()
30 | for v in vals:
31 | t.append(eval(v))
32 | return np.array(AAValues)
33 |
34 | def aaindex1PCAValues(n_features=15):
35 | file = './Amino_Acids_PCAVal{}_dict.txt'.format(n_features)
36 | with open(file,'r') as fr:
37 | aadic = eval(fr.read())
38 | return aadic
39 |
40 | if __name__ == '__main__':
41 | aavals = aaindex1Values().T
42 |
43 | scaler = StandardScaler()
44 | aa_scal = scaler.fit_transform(aavals)
45 | pca = PCA(n_components=19)
46 | aa_pca = pca.fit_transform(aa_scal)
47 | aaval_dic = {}
48 | for i in range(20):
49 | aaval_dic[AminoAcids[i]] = list(aa_pca[i])
50 | with open('./Amino_Acids_PCAVal19_dict.txt','w') as fw:
51 | fw.write(str(aaval_dic))
52 |
53 |
54 |
--------------------------------------------------------------------------------
/code/fold/CNN_A_fold_onehot.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import tensorflow as tf
4 |
5 | try:
6 | import tensorflow.python.keras as keras
7 | except:
8 | import tensorflow.keras as keras
9 |
10 | from tensorflow.python.keras import layers
11 | from tensorflow.python.keras import backend as K
12 |
13 | from sklearn.model_selection import KFold
14 |
15 | from sklearn import metrics
16 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
17 | from sklearn.metrics import f1_score,roc_auc_score, auc
18 |
19 | from keras import regularizers
20 |
21 | import os
22 | import scipy.io as sio
23 | from sklearn.model_selection import train_test_split
24 | from sklearn.preprocessing import LabelEncoder
25 | import numpy as np
26 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
27 | from keras.utils import plot_model
28 |
29 |
30 |
31 |
32 | import matplotlib.pyplot as plt
33 |
34 | from tensorflow.python.keras.models import load_model
35 |
36 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
37 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
38 |
39 |
40 |
41 |
42 | #测试集
43 | def CNN_onehot(Dropout1=0,Epochs= 20,Batch_size=64):
44 | # 优化器选择 Adam 优化器。
45 | # 损失函数使用 sparse_categorical_crossentropy,
46 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
47 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
48 |
49 | Feature_test = np.load("../../data/TCRA_train_feature_array.npy")
50 | Label_array = np.load("../../data/TCRA_train_label_array.npy")
51 |
52 | X = Feature_test[:,0:29,:] #提取one-hot特征
53 | #print(X[0])
54 | Y = Label_array[:,1]
55 |
56 | X = X.reshape(len(X),-1)
57 | #loo = LeaveOneOut()
58 |
59 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
60 | kf.get_n_splits(X)
61 | TN = FP = FN = TP = 0
62 | aa = 1
63 |
64 | for train_index, test_index in kf.split(X):
65 | np.random.shuffle(train_index)
66 | np.random.shuffle(test_index)
67 |
68 | X_train, X_test = X[train_index], X[test_index]
69 | Y_train, Y_test = Y[train_index], Y[test_index]
70 |
71 |
72 |
73 |
74 |
75 |
76 | X_train= X_train.reshape([len(X_train),29,20,1])
77 | X_test = X_test.reshape([len(X_test),29,20,1])
78 | X_test=tf.cast(X_test, tf.float32)
79 |
80 | model = tf.keras.models.Sequential([
81 | # tf.keras.layers.Conv2D(16, (7,7),padding = 'same', input_shape=(29,20,2),activation='relu'),
82 | # #tf.keras.layers.LeakyReLU(alpha=0.05),
83 | # tf.keras.layers.MaxPooling2D(2,2),
84 |
85 | tf.keras.layers.Conv2D(64, (5,5),padding = 'same', input_shape=(29,20,1),activation='relu'),
86 | #tf.keras.layers.LeakyReLU(alpha=0.05),
87 | #tf.keras.layers.MaxPooling2D(2,2),
88 | tf.keras.layers.AveragePooling2D(2,2),
89 |
90 |
91 | tf.keras.layers.Conv2D(128, (3,3),padding = 'same',activation='relu'),
92 | #tf.keras.layers.LeakyReLU(alpha=0.05),
93 | #tf.keras.layers.MaxPooling2D(2,2),
94 | tf.keras.layers.AveragePooling2D(2,2),
95 |
96 |
97 | tf.keras.layers.Flatten(),
98 |
99 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
100 |
101 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
102 | #tf.keras.layers.LeakyReLU(alpha=0.05),
103 |
104 |
105 |
106 | tf.keras.layers.Dense(128,activation='relu'),
107 | #tf.keras.layers.LeakyReLU(alpha=0.05),
108 | tf.keras.layers.Dense(64,activation='relu'),
109 | #tf.keras.layers.LeakyReLU(alpha=0.05),
110 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
111 | tf.keras.layers.Dense(1, activation='sigmoid')
112 |
113 |
114 |
115 |
116 | ])
117 |
118 | model.compile(optimizer="Adam",
119 | loss=keras.losses.binary_crossentropy,
120 | metrics=['accuracy'])
121 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
122 |
123 |
124 | Y_pred = model.predict_classes(X_test)
125 | #print(Y_pred)
126 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
127 |
128 |
129 |
130 |
131 |
132 | TP += confusion_matrix1[0,0]
133 | FN += confusion_matrix1[0,1]
134 | FP += confusion_matrix1[1,0]
135 | TN += confusion_matrix1[1,1]
136 |
137 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
138 | # precision = precision_score(Y_test,Y_pred) #精确率
139 | # recall = recall_score(Y_test,Y_pred) #召回率
140 | # f1= f1_score(Y_test,Y_pred) #F1
141 |
142 | # print('混淆矩阵\n',confusion_matrix1,
143 | # '\n准确率ACC:',accuracy,
144 | # '\n精确率precision:',precision,
145 | # '\n召回率recall:',recall,
146 | # '\nF1:',f1,
147 | # )
148 |
149 | # y_predict = model.predict(X_test)
150 |
151 | # y_probs = model.predict_proba(X_test) #模型的预测得分
152 | # #print(y_probs)
153 |
154 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
155 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
156 | # #开始画ROC曲线
157 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
158 | # plt.legend(loc='lower right')
159 | # plt.plot([0,1],[0,1],'r--')
160 | # plt.xlim([-0.1,1.1])
161 | # plt.ylim([-0.1,1.1])
162 | # plt.xlabel('False Positive Rate') #横坐标是fpr
163 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
164 | # plt.title('Receiver operating characteristic example')
165 | # plt.show()
166 |
167 | #model.save('./data_625/model_'+str(aa)+'.h5')
168 | print(aa)
169 |
170 |
171 | if aa == 1:
172 | Y_test_all = Y_test
173 | Y_pred_all = Y_pred
174 | else:
175 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
176 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
177 |
178 | aa += 1
179 | del model
180 |
181 | print('\n\n总混淆矩阵')
182 | print(TP,FN)
183 | print(FP,TN)
184 |
185 | #print(Y_test_all[0])
186 |
187 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
188 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
189 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
190 | f1= f1_score(Y_test_all,Y_pred_all) #F1
191 |
192 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
193 |
194 |
195 |
196 | print('\n准确率ACC:',accuracy,
197 | '\n精确率precision:',precision,
198 | '\n召回率recall:',recall,
199 | '\nF1:',f1,
200 | '\nMCC:',MCC
201 | )
202 |
203 |
204 | # Acc = (TP + TN)/(TP+FN+FP+TN)
205 | # print('ACC: {}'.format(Acc))
206 |
207 |
208 |
209 |
210 |
211 |
212 | CNN_onehot(0.3,50,128)
213 |
214 |
215 |
216 |
217 |
--------------------------------------------------------------------------------
/code/fold/CNN_A_fold_onehot_chem.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | import tensorflow as tf
6 |
7 | try:
8 | import tensorflow.python.keras as keras
9 | except:
10 | import tensorflow.keras as keras
11 |
12 | from tensorflow.python.keras import layers
13 | from tensorflow.python.keras import backend as K
14 |
15 | from sklearn.model_selection import KFold
16 |
17 | from sklearn import metrics
18 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
19 | from sklearn.metrics import f1_score,roc_auc_score, auc
20 |
21 | from keras import regularizers
22 |
23 | import os
24 | import scipy.io as sio
25 | from sklearn.model_selection import train_test_split
26 | from sklearn.preprocessing import LabelEncoder
27 | import numpy as np
28 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
29 | from keras.utils import plot_model
30 |
31 |
32 |
33 |
34 | import matplotlib.pyplot as plt
35 |
36 | from tensorflow.python.keras.models import load_model
37 |
38 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
39 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
40 |
41 |
42 |
43 |
44 |
45 | #测试集
46 | def CNN_onehot_chem(Dropout1=0,Epochs= 20,Batch_size=64):
47 | # 优化器选择 Adam 优化器。
48 | # 损失函数使用 sparse_categorical_crossentropy,
49 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
50 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
51 |
52 | Feature_test = np.load("../../data/TCRA_train_feature_array.npy")
53 | Label_array = np.load("../../data/TCRA_train_label_array.npy")
54 |
55 | X = Feature_test#[:,0:29,:] #提取one-hot特征
56 | #print(X[0])
57 | Y = Label_array[:,1]
58 |
59 | X = X.reshape(len(X),-1)
60 | #loo = LeaveOneOut()
61 |
62 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
63 | kf.get_n_splits(X)
64 | TN = FP = FN = TP = 0
65 | aa = 1
66 |
67 | for train_index, test_index in kf.split(X):
68 | np.random.shuffle(train_index)
69 | np.random.shuffle(test_index)
70 |
71 | X_train, X_test = X[train_index], X[test_index]
72 | Y_train, Y_test = Y[train_index], Y[test_index]
73 |
74 |
75 |
76 |
77 |
78 |
79 | X_train= X_train.reshape([len(X_train),29,20,2])
80 | X_test = X_test.reshape([len(X_test),29,20,2])
81 | X_test=tf.cast(X_test, tf.float32)
82 |
83 | model = tf.keras.models.Sequential([
84 | # tf.keras.layers.Conv2D(16, (7,7),padding = 'same', input_shape=(29,20,2),activation='relu'),
85 | # #tf.keras.layers.LeakyReLU(alpha=0.05),
86 | # tf.keras.layers.MaxPooling2D(2,2),
87 |
88 | tf.keras.layers.Conv2D(64, (5,5),padding = 'same', input_shape=(29,20,2),activation='relu'),
89 | #tf.keras.layers.LeakyReLU(alpha=0.05),
90 | #tf.keras.layers.MaxPooling2D(2,2),
91 | tf.keras.layers.AveragePooling2D(2,2),
92 |
93 |
94 | tf.keras.layers.Conv2D(128, (3,3),padding = 'same',activation='relu'),
95 | #tf.keras.layers.LeakyReLU(alpha=0.05),
96 | #tf.keras.layers.MaxPooling2D(2,2),
97 | tf.keras.layers.AveragePooling2D(2,2),
98 |
99 |
100 | tf.keras.layers.Flatten(),
101 |
102 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
103 |
104 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
105 | #tf.keras.layers.LeakyReLU(alpha=0.05),
106 |
107 |
108 |
109 | tf.keras.layers.Dense(128,activation='relu'),
110 | #tf.keras.layers.LeakyReLU(alpha=0.05),
111 | tf.keras.layers.Dense(64,activation='relu'),
112 | #tf.keras.layers.LeakyReLU(alpha=0.05),
113 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
114 | tf.keras.layers.Dense(1, activation='sigmoid')
115 |
116 |
117 |
118 |
119 | ])
120 |
121 | model.compile(optimizer="Adam",
122 | loss=keras.losses.binary_crossentropy,
123 | metrics=['accuracy'])
124 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
125 |
126 |
127 | Y_pred = model.predict_classes(X_test)
128 | #print(Y_pred)
129 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
130 |
131 |
132 |
133 |
134 |
135 | TP += confusion_matrix1[0,0]
136 | FN += confusion_matrix1[0,1]
137 | FP += confusion_matrix1[1,0]
138 | TN += confusion_matrix1[1,1]
139 |
140 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
141 | # precision = precision_score(Y_test,Y_pred) #精确率
142 | # recall = recall_score(Y_test,Y_pred) #召回率
143 | # f1= f1_score(Y_test,Y_pred) #F1
144 |
145 | # print('混淆矩阵\n',confusion_matrix1,
146 | # '\n准确率ACC:',accuracy,
147 | # '\n精确率precision:',precision,
148 | # '\n召回率recall:',recall,
149 | # '\nF1:',f1,
150 | # )
151 |
152 | # y_predict = model.predict(X_test)
153 |
154 | # y_probs = model.predict_proba(X_test) #模型的预测得分
155 | # #print(y_probs)
156 |
157 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
158 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
159 | # #开始画ROC曲线
160 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
161 | # plt.legend(loc='lower right')
162 | # plt.plot([0,1],[0,1],'r--')
163 | # plt.xlim([-0.1,1.1])
164 | # plt.ylim([-0.1,1.1])
165 | # plt.xlabel('False Positive Rate') #横坐标是fpr
166 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
167 | # plt.title('Receiver operating characteristic example')
168 | # plt.show()
169 |
170 | #model.save('./data_625/model_'+str(aa)+'.h5')
171 | print(aa)
172 |
173 |
174 | if aa == 1:
175 | Y_test_all = Y_test
176 | Y_pred_all = Y_pred
177 | else:
178 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
179 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
180 |
181 | aa += 1
182 | del model
183 |
184 | print('\n\n总混淆矩阵')
185 | print(TP,FN)
186 | print(FP,TN)
187 |
188 | #print(Y_test_all[0])
189 |
190 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
191 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
192 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
193 | f1= f1_score(Y_test_all,Y_pred_all) #F1
194 |
195 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
196 |
197 |
198 |
199 | print('\n准确率ACC:',accuracy,
200 | '\n精确率precision:',precision,
201 | '\n召回率recall:',recall,
202 | '\nF1:',f1,
203 | '\nMCC:',MCC
204 | )
205 |
206 |
207 | # Acc = (TP + TN)/(TP+FN+FP+TN)
208 | # print('ACC: {}'.format(Acc))
209 |
210 |
211 |
212 | # In[3]:
213 |
214 |
215 | CNN_onehot_chem(0.3,50,128)
216 |
217 |
218 |
219 |
220 |
221 |
--------------------------------------------------------------------------------
/code/fold/CNN_A_fold_pca.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | import tensorflow as tf
6 |
7 | try:
8 | import tensorflow.python.keras as keras
9 | except:
10 | import tensorflow.keras as keras
11 |
12 | from tensorflow.python.keras import layers
13 | from tensorflow.python.keras import backend as K
14 |
15 | from sklearn.model_selection import KFold
16 |
17 | from sklearn import metrics
18 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
19 | from sklearn.metrics import f1_score,roc_auc_score, auc
20 |
21 | from keras import regularizers
22 |
23 | import os
24 | import scipy.io as sio
25 | from sklearn.model_selection import train_test_split
26 | from sklearn.preprocessing import LabelEncoder
27 | import numpy as np
28 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
29 | from keras.utils import plot_model
30 |
31 |
32 |
33 |
34 | import matplotlib.pyplot as plt
35 |
36 | from tensorflow.python.keras.models import load_model
37 |
38 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
39 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
40 |
41 |
42 |
43 |
44 |
45 | #测试集
46 | def CNN_pca(Dropout1 = 0, Epochs = 20, Batch_size = 64, PCA_num = 18 ):
47 | # 优化器选择 Adam 优化器。
48 | # 损失函数使用 sparse_categorical_crossentropy,
49 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
50 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
51 |
52 | Feature_test = np.load("../../data/train_TCRA_PCA{}_feature_array.npy".format(PCA_num))
53 | Label_array = np.load("../../data/train_TCRA_PCA{}_label_array.npy".format(PCA_num))
54 | print('\n\n\nPCA_NUM: {}'.format(PCA_num))
55 | print('Feature.shape: {}'.format(Feature_test.shape))
56 | #print('Label.shape: {}'.format(Label_array.shape))
57 |
58 |
59 | X = Feature_test#[:,29:58,:]
60 | Y = Label_array[:,1]
61 |
62 | X = X.reshape(len(X),-1)
63 | #loo = LeaveOneOut()
64 |
65 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
66 | kf.get_n_splits(X)
67 | TN = FP = FN = TP = 0
68 | aa = 1
69 | for train_index, test_index in kf.split(X):
70 | np.random.shuffle(train_index)
71 | np.random.shuffle(test_index)
72 |
73 | X_train, X_test = X[train_index], X[test_index]
74 | Y_train, Y_test = Y[train_index], Y[test_index]
75 |
76 | X_train= X_train.reshape([len(X_train),20,PCA_num+1,2])
77 | X_test = X_test.reshape([len(X_test),20,PCA_num+1,2])
78 | X_test=tf.cast(X_test, tf.float32)
79 |
80 | model = tf.keras.models.Sequential([
81 | # tf.keras.layers.Conv2D(16, (7,7),padding = 'same', input_shape=(29,20,2),activation='relu'),
82 | # #tf.keras.layers.LeakyReLU(alpha=0.05),
83 | # tf.keras.layers.MaxPooling2D(2,2),
84 |
85 | tf.keras.layers.Conv2D(64, (5,5),padding = 'same', input_shape=(20,PCA_num+1,2),activation='relu'),
86 | #tf.keras.layers.LeakyReLU(alpha=0.05),
87 | #tf.keras.layers.MaxPooling2D(2,2),
88 | tf.keras.layers.AveragePooling2D(2,2),
89 |
90 |
91 | tf.keras.layers.Conv2D(128, (3,3),padding = 'same',activation='relu'),
92 | #tf.keras.layers.LeakyReLU(alpha=0.05),
93 | #tf.keras.layers.MaxPooling2D(2,2),
94 | tf.keras.layers.AveragePooling2D(2,2),
95 |
96 |
97 | tf.keras.layers.Flatten(),
98 |
99 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
100 |
101 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
102 | #tf.keras.layers.LeakyReLU(alpha=0.05),
103 |
104 |
105 |
106 | tf.keras.layers.Dense(128,activation='relu'),
107 | #tf.keras.layers.LeakyReLU(alpha=0.05),
108 | tf.keras.layers.Dense(64,activation='relu'),
109 | #tf.keras.layers.LeakyReLU(alpha=0.05),
110 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
111 | tf.keras.layers.Dense(1, activation='sigmoid')
112 |
113 |
114 |
115 |
116 | ])
117 |
118 | model.compile(optimizer="Adam",
119 | loss=keras.losses.binary_crossentropy,
120 | metrics=['accuracy'])
121 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
122 |
123 |
124 | Y_pred = model.predict_classes(X_test)
125 | #print(Y_pred)
126 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
127 |
128 |
129 |
130 |
131 |
132 | TP += confusion_matrix1[0,0]
133 | FN += confusion_matrix1[0,1]
134 | FP += confusion_matrix1[1,0]
135 | TN += confusion_matrix1[1,1]
136 |
137 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
138 | # precision = precision_score(Y_test,Y_pred) #精确率
139 | # recall = recall_score(Y_test,Y_pred) #召回率
140 | # f1= f1_score(Y_test,Y_pred) #F1
141 |
142 | # print('混淆矩阵\n',confusion_matrix1,
143 | # '\n准确率ACC:',accuracy,
144 | # '\n精确率precision:',precision,
145 | # '\n召回率recall:',recall,
146 | # '\nF1:',f1,
147 | # )
148 |
149 | # y_predict = model.predict(X_test)
150 |
151 | # y_probs = model.predict_proba(X_test) #模型的预测得分
152 | # #print(y_probs)
153 |
154 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
155 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
156 | # #开始画ROC曲线
157 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
158 | # plt.legend(loc='lower right')
159 | # plt.plot([0,1],[0,1],'r--')
160 | # plt.xlim([-0.1,1.1])
161 | # plt.ylim([-0.1,1.1])
162 | # plt.xlabel('False Positive Rate') #横坐标是fpr
163 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
164 | # plt.title('Receiver operating characteristic example')
165 | # plt.show()
166 |
167 | #model.save('./data_625/model_'+str(aa)+'.h5')
168 | #aa += 1
169 | #print(aa)
170 |
171 |
172 | if aa == 1:
173 | Y_test_all = Y_test
174 | Y_pred_all = Y_pred
175 | else:
176 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
177 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
178 |
179 | aa += 1
180 | del model
181 |
182 | print('总混淆矩阵')
183 | print(TP,FN)
184 | print(FP,TN)
185 |
186 | #print(Y_test_all[0])
187 |
188 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
189 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
190 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
191 | f1= f1_score(Y_test_all,Y_pred_all) #F1
192 |
193 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
194 |
195 |
196 |
197 | print('\n准确率ACC:',accuracy,
198 | '\n精确率precision:',precision,
199 | '\n召回率recall:',recall,
200 | '\nF1:',f1,
201 | '\nMCC:',MCC
202 | )
203 |
204 |
205 |
206 |
207 |
208 | for i in range(8,21):
209 |
210 | CNN_pca(0.3,50,128,i)
211 |
212 |
213 |
214 |
215 |
216 |
217 |
--------------------------------------------------------------------------------
/code/fold/CNN_B_fold_onehot.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import tensorflow as tf
4 |
5 | try:
6 | import tensorflow.python.keras as keras
7 | except:
8 | import tensorflow.keras as keras
9 |
10 | from tensorflow.python.keras import layers
11 | from tensorflow.python.keras import backend as K
12 |
13 | from sklearn.model_selection import KFold
14 |
15 | from sklearn import metrics
16 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
17 | from sklearn.metrics import f1_score,roc_auc_score, auc
18 |
19 | from keras import regularizers
20 |
21 | import os
22 | import scipy.io as sio
23 | from sklearn.model_selection import train_test_split
24 | from sklearn.preprocessing import LabelEncoder
25 | import numpy as np
26 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
27 | from keras.utils import plot_model
28 |
29 |
30 |
31 |
32 | import matplotlib.pyplot as plt
33 |
34 | from tensorflow.python.keras.models import load_model
35 |
36 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
37 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
38 |
39 |
40 |
41 | #测试集
42 | def CNN_onehot(Dropout1=0,Epochs= 20,Batch_size=64):
43 | # 优化器选择 Adam 优化器。
44 | # 损失函数使用 sparse_categorical_crossentropy,
45 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
46 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
47 |
48 | Feature_test = np.load("../../data/TCRB_train_feature_array.npy")
49 | Label_array = np.load("../../data/TCRB_train_label_array.npy")
50 |
51 | X = Feature_test[:,0:29,:] #提取one-hot特征
52 | #print(X[0])
53 | Y = Label_array[:,1]
54 |
55 | X = X.reshape(len(X),-1)
56 | #loo = LeaveOneOut()
57 |
58 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
59 | kf.get_n_splits(X)
60 | TN = FP = FN = TP = 0
61 | aa = 1
62 | for train_index, test_index in kf.split(X):
63 | np.random.shuffle(train_index)
64 | np.random.shuffle(test_index)
65 |
66 | X_train, X_test = X[train_index], X[test_index]
67 | Y_train, Y_test = Y[train_index], Y[test_index]
68 |
69 |
70 |
71 |
72 |
73 |
74 | X_train= X_train.reshape([len(X_train),29,20,1])
75 | X_test = X_test.reshape([len(X_test),29,20,1])
76 | X_test=tf.cast(X_test, tf.float32)
77 |
78 | model = tf.keras.models.Sequential([
79 | # tf.keras.layers.Conv2D(16, (7,7),padding = 'same', input_shape=(29,20,2),activation='relu'),
80 | # #tf.keras.layers.LeakyReLU(alpha=0.05),
81 | # tf.keras.layers.MaxPooling2D(2,2),
82 |
83 | tf.keras.layers.Conv2D(64, (5,5),padding = 'same', input_shape=(29,20,1),activation='relu'),
84 | #tf.keras.layers.LeakyReLU(alpha=0.05),
85 | #tf.keras.layers.MaxPooling2D(2,2),
86 | tf.keras.layers.AveragePooling2D(2,2),
87 |
88 |
89 | tf.keras.layers.Conv2D(128, (3,3),padding = 'same',activation='relu'),
90 | #tf.keras.layers.LeakyReLU(alpha=0.05),
91 | #tf.keras.layers.MaxPooling2D(2,2),
92 | tf.keras.layers.AveragePooling2D(2,2),
93 |
94 |
95 | tf.keras.layers.Flatten(),
96 |
97 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
98 |
99 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
100 | #tf.keras.layers.LeakyReLU(alpha=0.05),
101 |
102 |
103 |
104 | tf.keras.layers.Dense(128,activation='relu'),
105 | #tf.keras.layers.LeakyReLU(alpha=0.05),
106 | tf.keras.layers.Dense(64,activation='relu'),
107 | #tf.keras.layers.LeakyReLU(alpha=0.05),
108 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
109 | tf.keras.layers.Dense(1, activation='sigmoid')
110 |
111 |
112 |
113 |
114 | ])
115 |
116 | model.compile(optimizer="Adam",
117 | loss=keras.losses.binary_crossentropy,
118 | metrics=['accuracy'])
119 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
120 |
121 |
122 | Y_pred = model.predict_classes(X_test)
123 | #print(Y_pred)
124 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
125 |
126 |
127 |
128 |
129 |
130 | TP += confusion_matrix1[0,0]
131 | FN += confusion_matrix1[0,1]
132 | FP += confusion_matrix1[1,0]
133 | TN += confusion_matrix1[1,1]
134 |
135 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
136 | # precision = precision_score(Y_test,Y_pred) #精确率
137 | # recall = recall_score(Y_test,Y_pred) #召回率
138 | # f1= f1_score(Y_test,Y_pred) #F1
139 |
140 | # print('混淆矩阵\n',confusion_matrix1,
141 | # '\n准确率ACC:',accuracy,
142 | # '\n精确率precision:',precision,
143 | # '\n召回率recall:',recall,
144 | # '\nF1:',f1,
145 | # )
146 |
147 | # y_predict = model.predict(X_test)
148 |
149 | # y_probs = model.predict_proba(X_test) #模型的预测得分
150 | # #print(y_probs)
151 |
152 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
153 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
154 | # #开始画ROC曲线
155 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
156 | # plt.legend(loc='lower right')
157 | # plt.plot([0,1],[0,1],'r--')
158 | # plt.xlim([-0.1,1.1])
159 | # plt.ylim([-0.1,1.1])
160 | # plt.xlabel('False Positive Rate') #横坐标是fpr
161 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
162 | # plt.title('Receiver operating characteristic example')
163 | # plt.show()
164 |
165 | #model.save('./data_625/model_'+str(aa)+'.h5')
166 | print(aa)
167 |
168 |
169 | if aa == 1:
170 | Y_test_all = Y_test
171 | Y_pred_all = Y_pred
172 | else:
173 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
174 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
175 |
176 | aa += 1
177 | del model
178 |
179 | print('\n\n总混淆矩阵')
180 | print(TP,FN)
181 | print(FP,TN)
182 |
183 | #print(Y_test_all[0])
184 |
185 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
186 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
187 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
188 | f1= f1_score(Y_test_all,Y_pred_all) #F1
189 |
190 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
191 |
192 |
193 |
194 | print('\n准确率ACC:',accuracy,
195 | '\n精确率precision:',precision,
196 | '\n召回率recall:',recall,
197 | '\nF1:',f1,
198 | '\nMCC:',MCC
199 | )
200 |
201 |
202 |
203 |
204 | CNN_onehot(0.3,50,128)
205 |
206 |
--------------------------------------------------------------------------------
/code/fold/CNN_B_fold_onehot_chem.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import tensorflow as tf
4 |
5 | try:
6 | import tensorflow.python.keras as keras
7 | except:
8 | import tensorflow.keras as keras
9 |
10 | from tensorflow.python.keras import layers
11 | from tensorflow.python.keras import backend as K
12 |
13 | from sklearn.model_selection import KFold
14 |
15 | from sklearn import metrics
16 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
17 | from sklearn.metrics import f1_score,roc_auc_score, auc
18 |
19 | from keras import regularizers
20 |
21 | import os
22 | import scipy.io as sio
23 | from sklearn.model_selection import train_test_split
24 | from sklearn.preprocessing import LabelEncoder
25 | import numpy as np
26 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
27 | from keras.utils import plot_model
28 |
29 |
30 |
31 |
32 | import matplotlib.pyplot as plt
33 |
34 | from tensorflow.python.keras.models import load_model
35 |
36 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
37 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
38 |
39 |
40 |
41 |
42 |
43 |
44 | #测试集
45 | def CNN_onehot_chem(Dropout1=0,Epochs= 20,Batch_size=64):
46 | # 优化器选择 Adam 优化器。
47 | # 损失函数使用 sparse_categorical_crossentropy,
48 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
49 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
50 |
51 | Feature_test = np.load("../../data/TCRB_train_feature_array.npy")
52 | Label_array = np.load("../../data/TCRB_train_label_array.npy")
53 |
54 | X = Feature_test#[:,0:29,:] #提取one-hot特征
55 | #print(X[0])
56 | Y = Label_array[:,1]
57 |
58 | X = X.reshape(len(X),-1)
59 | #loo = LeaveOneOut()
60 |
61 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
62 | kf.get_n_splits(X)
63 | TN = FP = FN = TP = 0
64 | aa = 1
65 | for train_index, test_index in kf.split(X):
66 | np.random.shuffle(train_index)
67 | np.random.shuffle(test_index)
68 |
69 | X_train, X_test = X[train_index], X[test_index]
70 | Y_train, Y_test = Y[train_index], Y[test_index]
71 |
72 |
73 |
74 |
75 |
76 |
77 | X_train= X_train.reshape([len(X_train),29,20,2])
78 | X_test = X_test.reshape([len(X_test),29,20,2])
79 | X_test=tf.cast(X_test, tf.float32)
80 |
81 | model = tf.keras.models.Sequential([
82 | # tf.keras.layers.Conv2D(16, (7,7),padding = 'same', input_shape=(29,20,2),activation='relu'),
83 | # #tf.keras.layers.LeakyReLU(alpha=0.05),
84 | # tf.keras.layers.MaxPooling2D(2,2),
85 |
86 | tf.keras.layers.Conv2D(64, (5,5),padding = 'same', input_shape=(29,20,2),activation='relu'),
87 | #tf.keras.layers.LeakyReLU(alpha=0.05),
88 | #tf.keras.layers.MaxPooling2D(2,2),
89 | tf.keras.layers.AveragePooling2D(2,2),
90 |
91 |
92 | tf.keras.layers.Conv2D(128, (3,3),padding = 'same',activation='relu'),
93 | #tf.keras.layers.LeakyReLU(alpha=0.05),
94 | #tf.keras.layers.MaxPooling2D(2,2),
95 | tf.keras.layers.AveragePooling2D(2,2),
96 |
97 |
98 | tf.keras.layers.Flatten(),
99 |
100 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
101 |
102 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
103 | #tf.keras.layers.LeakyReLU(alpha=0.05),
104 |
105 |
106 |
107 | tf.keras.layers.Dense(128,activation='relu'),
108 | #tf.keras.layers.LeakyReLU(alpha=0.05),
109 | tf.keras.layers.Dense(64,activation='relu'),
110 | #tf.keras.layers.LeakyReLU(alpha=0.05),
111 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
112 | tf.keras.layers.Dense(1, activation='sigmoid')
113 |
114 |
115 |
116 |
117 | ])
118 |
119 | model.compile(optimizer="Adam",
120 | loss=keras.losses.binary_crossentropy,
121 | metrics=['accuracy'])
122 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
123 |
124 |
125 | Y_pred = model.predict_classes(X_test)
126 | #print(Y_pred)
127 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
128 |
129 |
130 |
131 |
132 |
133 | TP += confusion_matrix1[0,0]
134 | FN += confusion_matrix1[0,1]
135 | FP += confusion_matrix1[1,0]
136 | TN += confusion_matrix1[1,1]
137 |
138 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
139 | # precision = precision_score(Y_test,Y_pred) #精确率
140 | # recall = recall_score(Y_test,Y_pred) #召回率
141 | # f1= f1_score(Y_test,Y_pred) #F1
142 |
143 | # print('混淆矩阵\n',confusion_matrix1,
144 | # '\n准确率ACC:',accuracy,
145 | # '\n精确率precision:',precision,
146 | # '\n召回率recall:',recall,
147 | # '\nF1:',f1,
148 | # )
149 |
150 | # y_predict = model.predict(X_test)
151 |
152 | # y_probs = model.predict_proba(X_test) #模型的预测得分
153 | # #print(y_probs)
154 |
155 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
156 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
157 | # #开始画ROC曲线
158 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
159 | # plt.legend(loc='lower right')
160 | # plt.plot([0,1],[0,1],'r--')
161 | # plt.xlim([-0.1,1.1])
162 | # plt.ylim([-0.1,1.1])
163 | # plt.xlabel('False Positive Rate') #横坐标是fpr
164 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
165 | # plt.title('Receiver operating characteristic example')
166 | # plt.show()
167 |
168 | #model.save('./data_625/model_'+str(aa)+'.h5')
169 | print(aa)
170 |
171 |
172 | if aa == 1:
173 | Y_test_all = Y_test
174 | Y_pred_all = Y_pred
175 | else:
176 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
177 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
178 |
179 | aa += 1
180 | del model
181 |
182 | print('\n\n总混淆矩阵')
183 | print(TP,FN)
184 | print(FP,TN)
185 |
186 | #print(Y_test_all[0])
187 |
188 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
189 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
190 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
191 | f1= f1_score(Y_test_all,Y_pred_all) #F1
192 |
193 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
194 |
195 |
196 |
197 | print('\n准确率ACC:',accuracy,
198 | '\n精确率precision:',precision,
199 | '\n召回率recall:',recall,
200 | '\nF1:',f1,
201 | '\nMCC:',MCC
202 | )
203 |
204 |
205 |
206 |
207 |
208 | CNN_onehot_chem(0.3,50,128)
209 |
210 |
--------------------------------------------------------------------------------
/code/fold/CNN_B_fold_pca.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import tensorflow as tf
4 |
5 | try:
6 | import tensorflow.python.keras as keras
7 | except:
8 | import tensorflow.keras as keras
9 |
10 | from tensorflow.python.keras import layers
11 | from tensorflow.python.keras import backend as K
12 |
13 | from sklearn.model_selection import KFold
14 |
15 | from sklearn import metrics
16 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
17 | from sklearn.metrics import f1_score,roc_auc_score, auc
18 |
19 | from keras import regularizers
20 |
21 | import os
22 | import scipy.io as sio
23 | from sklearn.model_selection import train_test_split
24 | from sklearn.preprocessing import LabelEncoder
25 | import numpy as np
26 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
27 | from keras.utils import plot_model
28 |
29 |
30 |
31 |
32 | import matplotlib.pyplot as plt
33 |
34 | from tensorflow.python.keras.models import load_model
35 |
36 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
37 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
38 |
39 |
40 |
41 | #测试集
42 | def CNN_pca(Dropout1 = 0, Epochs = 20, Batch_size = 64, PCA_num = 18 ):
43 | # 优化器选择 Adam 优化器。
44 | # 损失函数使用 sparse_categorical_crossentropy,
45 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
46 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
47 |
48 | Feature_test = np.load("../../data/train_TCRB_PCA{}_feature_array.npy".format(PCA_num))
49 | Label_array = np.load("../../data/train_TCRB_PCA{}_label_array.npy".format(PCA_num))
50 | print('\n\n\nPCA_NUM: {}'.format(PCA_num))
51 | print('Feature.shape: {}'.format(Feature_test.shape))
52 | #print('Label.shape: {}'.format(Label_array.shape))
53 |
54 |
55 | X = Feature_test#[:,29:58,:]
56 | Y = Label_array[:,1]
57 |
58 | X = X.reshape(len(X),-1)
59 | #loo = LeaveOneOut()
60 |
61 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
62 | kf.get_n_splits(X)
63 | TN = FP = FN = TP = 0
64 | aa = 1
65 | for train_index, test_index in kf.split(X):
66 | np.random.shuffle(train_index)
67 | np.random.shuffle(test_index)
68 |
69 | X_train, X_test = X[train_index], X[test_index]
70 | Y_train, Y_test = Y[train_index], Y[test_index]
71 |
72 | X_train= X_train.reshape([len(X_train),20,PCA_num+1,2])
73 | X_test = X_test.reshape([len(X_test),20,PCA_num+1,2])
74 | X_test=tf.cast(X_test, tf.float32)
75 |
76 | model = tf.keras.models.Sequential([
77 | # tf.keras.layers.Conv2D(16, (7,7),padding = 'same', input_shape=(29,20,2),activation='relu'),
78 | # #tf.keras.layers.LeakyReLU(alpha=0.05),
79 | # tf.keras.layers.MaxPooling2D(2,2),
80 |
81 | tf.keras.layers.Conv2D(64, (5,5),padding = 'same', input_shape=(20,PCA_num+1,2),activation='relu'),
82 | #tf.keras.layers.LeakyReLU(alpha=0.05),
83 | #tf.keras.layers.MaxPooling2D(2,2),
84 | tf.keras.layers.AveragePooling2D(2,2),
85 |
86 |
87 | tf.keras.layers.Conv2D(128, (3,3),padding = 'same',activation='relu'),
88 | #tf.keras.layers.LeakyReLU(alpha=0.05),
89 | #tf.keras.layers.MaxPooling2D(2,2),
90 | tf.keras.layers.AveragePooling2D(2,2),
91 |
92 |
93 | tf.keras.layers.Flatten(),
94 |
95 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
96 |
97 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
98 | #tf.keras.layers.LeakyReLU(alpha=0.05),
99 |
100 |
101 |
102 | tf.keras.layers.Dense(128,activation='relu'),
103 | #tf.keras.layers.LeakyReLU(alpha=0.05),
104 | tf.keras.layers.Dense(64,activation='relu'),
105 | #tf.keras.layers.LeakyReLU(alpha=0.05),
106 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
107 | tf.keras.layers.Dense(1, activation='sigmoid')
108 |
109 |
110 |
111 |
112 | ])
113 |
114 | model.compile(optimizer="Adam",
115 | loss=keras.losses.binary_crossentropy,
116 | metrics=['accuracy'])
117 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
118 |
119 |
120 | Y_pred = model.predict_classes(X_test)
121 | #print(Y_pred)
122 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
123 |
124 |
125 |
126 |
127 |
128 | TP += confusion_matrix1[0,0]
129 | FN += confusion_matrix1[0,1]
130 | FP += confusion_matrix1[1,0]
131 | TN += confusion_matrix1[1,1]
132 |
133 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
134 | # precision = precision_score(Y_test,Y_pred) #精确率
135 | # recall = recall_score(Y_test,Y_pred) #召回率
136 | # f1= f1_score(Y_test,Y_pred) #F1
137 |
138 | # print('混淆矩阵\n',confusion_matrix1,
139 | # '\n准确率ACC:',accuracy,
140 | # '\n精确率precision:',precision,
141 | # '\n召回率recall:',recall,
142 | # '\nF1:',f1,
143 | # )
144 |
145 | # y_predict = model.predict(X_test)
146 |
147 | # y_probs = model.predict_proba(X_test) #模型的预测得分
148 | # #print(y_probs)
149 |
150 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
151 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
152 | # #开始画ROC曲线
153 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
154 | # plt.legend(loc='lower right')
155 | # plt.plot([0,1],[0,1],'r--')
156 | # plt.xlim([-0.1,1.1])
157 | # plt.ylim([-0.1,1.1])
158 | # plt.xlabel('False Positive Rate') #横坐标是fpr
159 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
160 | # plt.title('Receiver operating characteristic example')
161 | # plt.show()
162 |
163 | #model.save('./data_625/model_'+str(aa)+'.h5')
164 | #aa += 1
165 | #print(aa)
166 |
167 |
168 | if aa == 1:
169 | Y_test_all = Y_test
170 | Y_pred_all = Y_pred
171 | else:
172 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
173 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
174 |
175 | aa += 1
176 | del model
177 |
178 | print('总混淆矩阵')
179 | print(TP,FN)
180 | print(FP,TN)
181 |
182 | #print(Y_test_all[0])
183 |
184 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
185 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
186 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
187 | f1= f1_score(Y_test_all,Y_pred_all) #F1
188 |
189 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
190 |
191 |
192 |
193 | print('\n准确率ACC:',accuracy,
194 | '\n精确率precision:',precision,
195 | '\n召回率recall:',recall,
196 | '\nF1:',f1,
197 | '\nMCC:',MCC
198 | )
199 |
200 |
201 |
202 |
203 | for i in range(8,21):
204 |
205 | CNN_pca(0.3,50,128,i)
206 |
207 |
208 |
209 |
210 |
211 |
212 |
--------------------------------------------------------------------------------
/code/fold/FULL_A_fold_onehot.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | import tensorflow as tf
6 |
7 | try:
8 | import tensorflow.python.keras as keras
9 | except:
10 | import tensorflow.keras as keras
11 |
12 | from tensorflow.python.keras import layers
13 | from tensorflow.python.keras import backend as K
14 |
15 | from sklearn.model_selection import KFold
16 |
17 | from sklearn import metrics
18 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
19 | from sklearn.metrics import f1_score,roc_auc_score, auc
20 |
21 | from keras import regularizers
22 |
23 | import os
24 | import scipy.io as sio
25 | from sklearn.model_selection import train_test_split
26 | from sklearn.preprocessing import LabelEncoder
27 | import numpy as np
28 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
29 | from keras.utils import plot_model
30 |
31 |
32 |
33 |
34 | import matplotlib.pyplot as plt
35 |
36 | from tensorflow.python.keras.models import load_model
37 |
38 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
39 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
40 |
41 |
42 |
43 |
44 |
45 |
46 | #测试集
47 | def FULL_onehot(Dropout1=0,Epochs= 20,Batch_size=64):
48 | # 优化器选择 Adam 优化器。
49 | # 损失函数使用 sparse_categorical_crossentropy,
50 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
51 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
52 |
53 | Feature_test = np.load("../../data/TCRA_train_feature_array.npy")
54 | Label_array = np.load("../../data/TCRA_train_label_array.npy")
55 |
56 | X = Feature_test[:,0:29,:] #提取one-hot特征
57 | #print(X[0])
58 | Y = Label_array[:,1]
59 |
60 | X = X.reshape(len(X),-1)
61 | #loo = LeaveOneOut()
62 |
63 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
64 | kf.get_n_splits(X)
65 | TN = FP = FN = TP = 0
66 | aa = 1
67 | for train_index, test_index in kf.split(X):
68 | np.random.shuffle(train_index)
69 | np.random.shuffle(test_index)
70 |
71 | X_train, X_test = X[train_index], X[test_index]
72 | Y_train, Y_test = Y[train_index], Y[test_index]
73 |
74 |
75 |
76 |
77 |
78 |
79 | X_train= X_train.reshape([len(X_train),29,20,1])
80 | X_test = X_test.reshape([len(X_test),29,20,1])
81 | X_test=tf.cast(X_test, tf.float32)
82 |
83 | model = tf.keras.models.Sequential([
84 |
85 |
86 | tf.keras.layers.Flatten(input_shape=(29,20,1)),
87 |
88 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
89 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
90 |
91 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
92 | #tf.keras.layers.LeakyReLU(alpha=0.05),
93 |
94 |
95 |
96 | tf.keras.layers.Dense(128,activation='relu'),
97 | #tf.keras.layers.LeakyReLU(alpha=0.05),
98 | tf.keras.layers.Dense(64,activation='relu'),
99 | #tf.keras.layers.LeakyReLU(alpha=0.05),
100 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
101 | tf.keras.layers.Dense(1, activation='sigmoid')
102 |
103 |
104 |
105 |
106 | ])
107 |
108 | model.compile(optimizer="Adam",
109 | loss=keras.losses.binary_crossentropy,
110 | metrics=['accuracy'])
111 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
112 |
113 |
114 | Y_pred = model.predict_classes(X_test)
115 | #print(Y_pred)
116 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
117 |
118 |
119 |
120 |
121 |
122 | TP += confusion_matrix1[0,0]
123 | FN += confusion_matrix1[0,1]
124 | FP += confusion_matrix1[1,0]
125 | TN += confusion_matrix1[1,1]
126 |
127 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
128 | # precision = precision_score(Y_test,Y_pred) #精确率
129 | # recall = recall_score(Y_test,Y_pred) #召回率
130 | # f1= f1_score(Y_test,Y_pred) #F1
131 |
132 | # print('混淆矩阵\n',confusion_matrix1,
133 | # '\n准确率ACC:',accuracy,
134 | # '\n精确率precision:',precision,
135 | # '\n召回率recall:',recall,
136 | # '\nF1:',f1,
137 | # )
138 |
139 | # y_predict = model.predict(X_test)
140 |
141 | # y_probs = model.predict_proba(X_test) #模型的预测得分
142 | # #print(y_probs)
143 |
144 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
145 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
146 | # #开始画ROC曲线
147 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
148 | # plt.legend(loc='lower right')
149 | # plt.plot([0,1],[0,1],'r--')
150 | # plt.xlim([-0.1,1.1])
151 | # plt.ylim([-0.1,1.1])
152 | # plt.xlabel('False Positive Rate') #横坐标是fpr
153 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
154 | # plt.title('Receiver operating characteristic example')
155 | # plt.show()
156 | #model.save('./data_625/model_'+str(aa)+'.h5')
157 | print(aa)
158 |
159 |
160 | if aa == 1:
161 | Y_test_all = Y_test
162 | Y_pred_all = Y_pred
163 | else:
164 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
165 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
166 |
167 | aa += 1
168 | del model
169 |
170 | print('\n\n总混淆矩阵')
171 | print(TP,FN)
172 | print(FP,TN)
173 |
174 | #print(Y_test_all[0])
175 |
176 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
177 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
178 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
179 | f1= f1_score(Y_test_all,Y_pred_all) #F1
180 |
181 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
182 |
183 |
184 |
185 | print('\n准确率ACC:',accuracy,
186 | '\n精确率precision:',precision,
187 | '\n召回率recall:',recall,
188 | '\nF1:',f1,
189 | '\nMCC:',MCC
190 | )
191 |
192 |
193 |
194 |
195 |
196 | FULL_onehot(0.3,50,128)
197 |
198 |
--------------------------------------------------------------------------------
/code/fold/FULL_A_fold_onehot_chem.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import tensorflow as tf
4 |
5 | try:
6 | import tensorflow.python.keras as keras
7 | except:
8 | import tensorflow.keras as keras
9 |
10 | from tensorflow.python.keras import layers
11 | from tensorflow.python.keras import backend as K
12 |
13 | from sklearn.model_selection import KFold
14 |
15 | from sklearn import metrics
16 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
17 | from sklearn.metrics import f1_score,roc_auc_score, auc
18 |
19 | from keras import regularizers
20 |
21 | import os
22 | import scipy.io as sio
23 | from sklearn.model_selection import train_test_split
24 | from sklearn.preprocessing import LabelEncoder
25 | import numpy as np
26 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
27 | from keras.utils import plot_model
28 |
29 |
30 |
31 |
32 | import matplotlib.pyplot as plt
33 |
34 | from tensorflow.python.keras.models import load_model
35 |
36 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
37 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
38 |
39 |
40 |
41 |
42 |
43 |
44 | #测试集
45 | def FULL_onehot_chem(Dropout1=0,Epochs= 20,Batch_size=64):
46 | # 优化器选择 Adam 优化器。
47 | # 损失函数使用 sparse_categorical_crossentropy,
48 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
49 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
50 |
51 | Feature_test = np.load("../../data/TCRA_train_feature_array.npy")
52 | Label_array = np.load("../../data/TCRA_train_label_array.npy")
53 |
54 | X = Feature_test#[:,29:58,:]
55 | Y = Label_array[:,1]
56 |
57 | X = X.reshape(len(X),-1)
58 | #loo = LeaveOneOut()
59 |
60 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
61 | kf.get_n_splits(X)
62 | TN = FP = FN = TP = 0
63 | aa = 1
64 | for train_index, test_index in kf.split(X):
65 | np.random.shuffle(train_index)
66 | np.random.shuffle(test_index)
67 |
68 | X_train, X_test = X[train_index], X[test_index]
69 | Y_train, Y_test = Y[train_index], Y[test_index]
70 |
71 |
72 |
73 |
74 |
75 |
76 | X_train= X_train.reshape([len(X_train),29,20,2])
77 | X_test = X_test.reshape([len(X_test),29,20,2])
78 | X_test=tf.cast(X_test, tf.float32)
79 |
80 | model = tf.keras.models.Sequential([
81 |
82 |
83 | tf.keras.layers.Flatten(input_shape=(29,20,2)),
84 |
85 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
86 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
87 |
88 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
89 | #tf.keras.layers.LeakyReLU(alpha=0.05),
90 |
91 |
92 |
93 | tf.keras.layers.Dense(128,activation='relu'),
94 | #tf.keras.layers.LeakyReLU(alpha=0.05),
95 | tf.keras.layers.Dense(64,activation='relu'),
96 | #tf.keras.layers.LeakyReLU(alpha=0.05),
97 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
98 | tf.keras.layers.Dense(1, activation='sigmoid')
99 |
100 |
101 |
102 |
103 | ])
104 |
105 | model.compile(optimizer="Adam",
106 | loss=keras.losses.binary_crossentropy,
107 | metrics=['accuracy'])
108 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
109 |
110 |
111 | Y_pred = model.predict_classes(X_test)
112 | #print(Y_pred)
113 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
114 |
115 |
116 |
117 |
118 |
119 | TP += confusion_matrix1[0,0]
120 | FN += confusion_matrix1[0,1]
121 | FP += confusion_matrix1[1,0]
122 | TN += confusion_matrix1[1,1]
123 |
124 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
125 | # precision = precision_score(Y_test,Y_pred) #精确率
126 | # recall = recall_score(Y_test,Y_pred) #召回率
127 | # f1= f1_score(Y_test,Y_pred) #F1
128 |
129 | # print('混淆矩阵\n',confusion_matrix1,
130 | # '\n准确率ACC:',accuracy,
131 | # '\n精确率precision:',precision,
132 | # '\n召回率recall:',recall,
133 | # '\nF1:',f1,
134 | # )
135 |
136 | # y_predict = model.predict(X_test)
137 |
138 | # y_probs = model.predict_proba(X_test) #模型的预测得分
139 | # #print(y_probs)
140 |
141 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
142 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
143 | # #开始画ROC曲线
144 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
145 | # plt.legend(loc='lower right')
146 | # plt.plot([0,1],[0,1],'r--')
147 | # plt.xlim([-0.1,1.1])
148 | # plt.ylim([-0.1,1.1])
149 | # plt.xlabel('False Positive Rate') #横坐标是fpr
150 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
151 | # plt.title('Receiver operating characteristic example')
152 | # plt.show()
153 |
154 | #model.save('./data_625/model_'+str(aa)+'.h5')
155 | print(aa)
156 |
157 |
158 | if aa == 1:
159 | Y_test_all = Y_test
160 | Y_pred_all = Y_pred
161 | else:
162 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
163 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
164 |
165 | aa += 1
166 | del model
167 |
168 | print('\n\n总混淆矩阵')
169 | print(TP,FN)
170 | print(FP,TN)
171 |
172 | #print(Y_test_all[0])
173 |
174 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
175 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
176 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
177 | f1= f1_score(Y_test_all,Y_pred_all) #F1
178 |
179 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
180 |
181 |
182 |
183 | print('\n准确率ACC:',accuracy,
184 | '\n精确率precision:',precision,
185 | '\n召回率recall:',recall,
186 | '\nF1:',f1,
187 | '\nMCC:',MCC
188 | )
189 |
190 |
191 |
192 |
193 | FULL_onehot_chem(0.3,50,128)
194 |
195 |
196 |
197 |
198 |
--------------------------------------------------------------------------------
/code/fold/FULL_A_fold_pca.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | import tensorflow as tf
5 |
6 | try:
7 | import tensorflow.python.keras as keras
8 | except:
9 | import tensorflow.keras as keras
10 |
11 | from tensorflow.python.keras import layers
12 | from tensorflow.python.keras import backend as K
13 |
14 | from sklearn.model_selection import KFold
15 |
16 | from sklearn import metrics
17 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
18 | from sklearn.metrics import f1_score,roc_auc_score, auc
19 |
20 | from keras import regularizers
21 |
22 | import os
23 | import scipy.io as sio
24 | from sklearn.model_selection import train_test_split
25 | from sklearn.preprocessing import LabelEncoder
26 | import numpy as np
27 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
28 | from keras.utils import plot_model
29 |
30 |
31 |
32 |
33 | import matplotlib.pyplot as plt
34 |
35 | from tensorflow.python.keras.models import load_model
36 |
37 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
38 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
39 |
40 |
41 |
42 |
43 |
44 |
45 | #测试集
46 | def FULL_pca(Dropout1 = 0, Epochs = 20, Batch_size = 64, PCA_num = 18 ):
47 | # 优化器选择 Adam 优化器。
48 | # 损失函数使用 sparse_categorical_crossentropy,
49 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
50 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
51 |
52 | Feature_test = np.load("../../data/train_TCRA_PCA{}_feature_array.npy".format(PCA_num))
53 | Label_array = np.load("../../data/train_TCRA_PCA{}_label_array.npy".format(PCA_num))
54 | print('\n\nPCA_NUM: {}'.format(PCA_num))
55 | print('Feature.shape: {}'.format(Feature_test.shape))
56 | #print('Label.shape: {}'.format(Label_array.shape))
57 |
58 |
59 | X = Feature_test#[:,29:58,:]
60 | Y = Label_array[:,1]
61 |
62 | X = X.reshape(len(X),-1)
63 | #loo = LeaveOneOut()
64 |
65 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
66 | kf.get_n_splits(X)
67 | TN = FP = FN = TP = 0
68 | aa = 1
69 | for train_index, test_index in kf.split(X):
70 | np.random.shuffle(train_index)
71 | np.random.shuffle(test_index)
72 |
73 | X_train, X_test = X[train_index], X[test_index]
74 | Y_train, Y_test = Y[train_index], Y[test_index]
75 |
76 | X_train= X_train.reshape([len(X_train),20,PCA_num+1,2])
77 | X_test = X_test.reshape([len(X_test),20,PCA_num+1,2])
78 | X_test=tf.cast(X_test, tf.float32)
79 |
80 | model = tf.keras.models.Sequential([
81 |
82 |
83 | tf.keras.layers.Flatten(input_shape=(20,PCA_num+1,2)),
84 |
85 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
86 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
87 |
88 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
89 | #tf.keras.layers.LeakyReLU(alpha=0.05),
90 |
91 |
92 |
93 | tf.keras.layers.Dense(128,activation='relu'),
94 | #tf.keras.layers.LeakyReLU(alpha=0.05),
95 | tf.keras.layers.Dense(64,activation='relu'),
96 | #tf.keras.layers.LeakyReLU(alpha=0.05),
97 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
98 | tf.keras.layers.Dense(1, activation='sigmoid')
99 |
100 |
101 |
102 |
103 | ])
104 |
105 | model.compile(optimizer="Adam",
106 | loss=keras.losses.binary_crossentropy,
107 | metrics=['accuracy'])
108 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
109 |
110 |
111 | Y_pred = model.predict_classes(X_test)
112 | #print(Y_pred)
113 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
114 |
115 |
116 |
117 |
118 |
119 | TP += confusion_matrix1[0,0]
120 | FN += confusion_matrix1[0,1]
121 | FP += confusion_matrix1[1,0]
122 | TN += confusion_matrix1[1,1]
123 |
124 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
125 | # precision = precision_score(Y_test,Y_pred) #精确率
126 | # recall = recall_score(Y_test,Y_pred) #召回率
127 | # f1= f1_score(Y_test,Y_pred) #F1
128 |
129 | # print('混淆矩阵\n',confusion_matrix1,
130 | # '\n准确率ACC:',accuracy,
131 | # '\n精确率precision:',precision,
132 | # '\n召回率recall:',recall,
133 | # '\nF1:',f1,
134 | # )
135 |
136 | # y_predict = model.predict(X_test)
137 |
138 | # y_probs = model.predict_proba(X_test) #模型的预测得分
139 | # #print(y_probs)
140 |
141 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
142 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
143 | # #开始画ROC曲线
144 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
145 | # plt.legend(loc='lower right')
146 | # plt.plot([0,1],[0,1],'r--')
147 | # plt.xlim([-0.1,1.1])
148 | # plt.ylim([-0.1,1.1])
149 | # plt.xlabel('False Positive Rate') #横坐标是fpr
150 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
151 | # plt.title('Receiver operating characteristic example')
152 | # plt.show()
153 | #model.save('./data_625/model_'+str(aa)+'.h5')
154 | #print(aa)
155 |
156 |
157 | if aa == 1:
158 | Y_test_all = Y_test
159 | Y_pred_all = Y_pred
160 | else:
161 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
162 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
163 |
164 | aa += 1
165 | del model
166 |
167 | print('总混淆矩阵')
168 | print(TP,FN)
169 | print(FP,TN)
170 |
171 | #print(Y_test_all[0])
172 |
173 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
174 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
175 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
176 | f1= f1_score(Y_test_all,Y_pred_all) #F1
177 |
178 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
179 |
180 |
181 |
182 | print('准确率ACC:',accuracy,
183 | '\n精确率precision:',precision,
184 | '\n召回率recall:',recall,
185 | '\nF1:',f1,
186 | '\nMCC:',MCC
187 | )
188 |
189 |
190 |
191 |
192 |
193 | for i in range(8,21):
194 |
195 | FULL_pca(0.3,50,128,i)
196 |
197 |
198 |
199 |
200 |
201 |
--------------------------------------------------------------------------------
/code/fold/FULL_B_fold_onehot.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | import tensorflow as tf
5 |
6 | try:
7 | import tensorflow.python.keras as keras
8 | except:
9 | import tensorflow.keras as keras
10 |
11 | from tensorflow.python.keras import layers
12 | from tensorflow.python.keras import backend as K
13 |
14 | from sklearn.model_selection import KFold
15 |
16 | from sklearn import metrics
17 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
18 | from sklearn.metrics import f1_score,roc_auc_score, auc
19 |
20 | from keras import regularizers
21 |
22 | import os
23 | import scipy.io as sio
24 | from sklearn.model_selection import train_test_split
25 | from sklearn.preprocessing import LabelEncoder
26 | import numpy as np
27 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
28 | from keras.utils import plot_model
29 |
30 |
31 |
32 |
33 | import matplotlib.pyplot as plt
34 |
35 | from tensorflow.python.keras.models import load_model
36 |
37 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
38 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
39 |
40 |
41 |
42 |
43 |
44 |
45 | #测试集
46 | def FULL_onehot(Dropout1=0,Epochs= 20,Batch_size=64):
47 | # 优化器选择 Adam 优化器。
48 | # 损失函数使用 sparse_categorical_crossentropy,
49 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
50 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
51 |
52 | Feature_test = np.load("../../data/TCRB_train_feature_array.npy")
53 | Label_array = np.load("../../data/TCRB_train_label_array.npy")
54 |
55 | X = Feature_test[:,0:29,:] #提取one-hot特征
56 | #print(X[0])
57 | Y = Label_array[:,1]
58 |
59 | X = X.reshape(len(X),-1)
60 | #loo = LeaveOneOut()
61 |
62 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
63 | kf.get_n_splits(X)
64 | TN = FP = FN = TP = 0
65 | aa = 1
66 | for train_index, test_index in kf.split(X):
67 | np.random.shuffle(train_index)
68 | np.random.shuffle(test_index)
69 |
70 | X_train, X_test = X[train_index], X[test_index]
71 | Y_train, Y_test = Y[train_index], Y[test_index]
72 |
73 |
74 |
75 |
76 |
77 |
78 | X_train= X_train.reshape([len(X_train),29,20,1])
79 | X_test = X_test.reshape([len(X_test),29,20,1])
80 | X_test=tf.cast(X_test, tf.float32)
81 |
82 | model = tf.keras.models.Sequential([
83 |
84 |
85 | tf.keras.layers.Flatten(input_shape=(29,20,1)),
86 |
87 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
88 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
89 |
90 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
91 | #tf.keras.layers.LeakyReLU(alpha=0.05),
92 |
93 |
94 |
95 | tf.keras.layers.Dense(128,activation='relu'),
96 | #tf.keras.layers.LeakyReLU(alpha=0.05),
97 | tf.keras.layers.Dense(64,activation='relu'),
98 | #tf.keras.layers.LeakyReLU(alpha=0.05),
99 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
100 | tf.keras.layers.Dense(1, activation='sigmoid')
101 |
102 |
103 |
104 |
105 | ])
106 |
107 | model.compile(optimizer="Adam",
108 | loss=keras.losses.binary_crossentropy,
109 | metrics=['accuracy'])
110 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
111 |
112 |
113 | Y_pred = model.predict_classes(X_test)
114 | #print(Y_pred)
115 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
116 |
117 |
118 |
119 |
120 |
121 | TP += confusion_matrix1[0,0]
122 | FN += confusion_matrix1[0,1]
123 | FP += confusion_matrix1[1,0]
124 | TN += confusion_matrix1[1,1]
125 |
126 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
127 | # precision = precision_score(Y_test,Y_pred) #精确率
128 | # recall = recall_score(Y_test,Y_pred) #召回率
129 | # f1= f1_score(Y_test,Y_pred) #F1
130 |
131 | # print('混淆矩阵\n',confusion_matrix1,
132 | # '\n准确率ACC:',accuracy,
133 | # '\n精确率precision:',precision,
134 | # '\n召回率recall:',recall,
135 | # '\nF1:',f1,
136 | # )
137 |
138 | # y_predict = model.predict(X_test)
139 |
140 | # y_probs = model.predict_proba(X_test) #模型的预测得分
141 | # #print(y_probs)
142 |
143 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
144 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
145 | # #开始画ROC曲线
146 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
147 | # plt.legend(loc='lower right')
148 | # plt.plot([0,1],[0,1],'r--')
149 | # plt.xlim([-0.1,1.1])
150 | # plt.ylim([-0.1,1.1])
151 | # plt.xlabel('False Positive Rate') #横坐标是fpr
152 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
153 | # plt.title('Receiver operating characteristic example')
154 | # plt.show()
155 | #model.save('./data_625/model_'+str(aa)+'.h5')
156 | #print(aa)
157 |
158 |
159 | if aa == 1:
160 | Y_test_all = Y_test
161 | Y_pred_all = Y_pred
162 | else:
163 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
164 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
165 |
166 | aa += 1
167 | del model
168 |
169 | print('\n\n总混淆矩阵')
170 | print(TP,FN)
171 | print(FP,TN)
172 |
173 | #print(Y_test_all[0])
174 |
175 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
176 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
177 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
178 | f1= f1_score(Y_test_all,Y_pred_all) #F1
179 |
180 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
181 |
182 |
183 |
184 | print('准确率ACC:',accuracy,
185 | '\n精确率precision:',precision,
186 | '\n召回率recall:',recall,
187 | '\nF1:',f1,
188 | '\nMCC:',MCC
189 | )
190 |
191 |
192 |
193 |
194 |
195 | FULL_onehot(0.3,50,128)
196 |
197 |
198 |
199 |
200 |
201 |
--------------------------------------------------------------------------------
/code/fold/FULL_B_fold_onehot_chem.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import tensorflow as tf
4 |
5 | try:
6 | import tensorflow.python.keras as keras
7 | except:
8 | import tensorflow.keras as keras
9 |
10 | from tensorflow.python.keras import layers
11 | from tensorflow.python.keras import backend as K
12 |
13 | from sklearn.model_selection import KFold
14 |
15 | from sklearn import metrics
16 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
17 | from sklearn.metrics import f1_score,roc_auc_score, auc
18 |
19 | from keras import regularizers
20 |
21 | import os
22 | import scipy.io as sio
23 | from sklearn.model_selection import train_test_split
24 | from sklearn.preprocessing import LabelEncoder
25 | import numpy as np
26 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
27 | from keras.utils import plot_model
28 |
29 |
30 |
31 |
32 | import matplotlib.pyplot as plt
33 |
34 | from tensorflow.python.keras.models import load_model
35 |
36 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
37 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
38 |
39 |
40 | # In[ ]:
41 |
42 |
43 |
44 |
45 |
46 | # In[2]:
47 |
48 |
49 |
50 |
51 | #测试集
52 | def FULL_onehot_chem(Dropout1=0,Epochs= 20,Batch_size=64):
53 | # 优化器选择 Adam 优化器。
54 | # 损失函数使用 sparse_categorical_crossentropy,
55 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
56 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
57 |
58 | Feature_test = np.load("../../data/TCRB_train_feature_array.npy")
59 | Label_array = np.load("../../data/TCRB_train_label_array.npy")
60 |
61 | X = Feature_test#[:,0:29,:] #提取one-hot特征
62 | #print(X[0])
63 | Y = Label_array[:,1]
64 |
65 | X = X.reshape(len(X),-1)
66 | #loo = LeaveOneOut()
67 |
68 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
69 | kf.get_n_splits(X)
70 | TN = FP = FN = TP = 0
71 | aa = 1
72 | for train_index, test_index in kf.split(X):
73 | np.random.shuffle(train_index)
74 | np.random.shuffle(test_index)
75 |
76 | X_train, X_test = X[train_index], X[test_index]
77 | Y_train, Y_test = Y[train_index], Y[test_index]
78 |
79 |
80 |
81 |
82 |
83 |
84 | X_train= X_train.reshape([len(X_train),29,20,2])
85 | X_test = X_test.reshape([len(X_test),29,20,2])
86 | X_test=tf.cast(X_test, tf.float32)
87 |
88 | model = tf.keras.models.Sequential([
89 |
90 |
91 | tf.keras.layers.Flatten(input_shape=(29,20,2)),
92 |
93 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
94 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
95 |
96 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
97 | #tf.keras.layers.LeakyReLU(alpha=0.05),
98 |
99 |
100 |
101 | tf.keras.layers.Dense(128,activation='relu'),
102 | #tf.keras.layers.LeakyReLU(alpha=0.05),
103 | tf.keras.layers.Dense(64,activation='relu'),
104 | #tf.keras.layers.LeakyReLU(alpha=0.05),
105 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
106 | tf.keras.layers.Dense(1, activation='sigmoid')
107 |
108 |
109 |
110 |
111 | ])
112 |
113 | model.compile(optimizer="Adam",
114 | loss=keras.losses.binary_crossentropy,
115 | metrics=['accuracy'])
116 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
117 |
118 |
119 | Y_pred = model.predict_classes(X_test)
120 | #print(Y_pred)
121 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
122 |
123 |
124 |
125 |
126 |
127 | TP += confusion_matrix1[0,0]
128 | FN += confusion_matrix1[0,1]
129 | FP += confusion_matrix1[1,0]
130 | TN += confusion_matrix1[1,1]
131 |
132 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
133 | # precision = precision_score(Y_test,Y_pred) #精确率
134 | # recall = recall_score(Y_test,Y_pred) #召回率
135 | # f1= f1_score(Y_test,Y_pred) #F1
136 |
137 | # print('混淆矩阵\n',confusion_matrix1,
138 | # '\n准确率ACC:',accuracy,
139 | # '\n精确率precision:',precision,
140 | # '\n召回率recall:',recall,
141 | # '\nF1:',f1,
142 | # )
143 |
144 | # y_predict = model.predict(X_test)
145 |
146 | # y_probs = model.predict_proba(X_test) #模型的预测得分
147 | # #print(y_probs)
148 |
149 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
150 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
151 | # #开始画ROC曲线
152 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
153 | # plt.legend(loc='lower right')
154 | # plt.plot([0,1],[0,1],'r--')
155 | # plt.xlim([-0.1,1.1])
156 | # plt.ylim([-0.1,1.1])
157 | # plt.xlabel('False Positive Rate') #横坐标是fpr
158 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
159 | # plt.title('Receiver operating characteristic example')
160 | # plt.show()
161 |
162 | #model.save('./data_625/model_'+str(aa)+'.h5')
163 | #print(aa)
164 |
165 |
166 | if aa == 1:
167 | Y_test_all = Y_test
168 | Y_pred_all = Y_pred
169 | else:
170 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
171 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
172 |
173 | aa += 1
174 | del model
175 |
176 | print('\n\n总混淆矩阵')
177 | print(TP,FN)
178 | print(FP,TN)
179 |
180 | #print(Y_test_all[0])
181 |
182 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
183 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
184 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
185 | f1= f1_score(Y_test_all,Y_pred_all) #F1
186 |
187 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
188 |
189 |
190 |
191 | print('准确率ACC:',accuracy,
192 | '\n精确率precision:',precision,
193 | '\n召回率recall:',recall,
194 | '\nF1:',f1,
195 | '\nMCC:',MCC
196 | )
197 |
198 |
199 |
200 |
201 | FULL_onehot_chem(0.3,50,128)
202 |
203 |
204 |
205 |
206 |
207 |
--------------------------------------------------------------------------------
/code/fold/FULL_B_fold_pca.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | import tensorflow as tf
5 |
6 | try:
7 | import tensorflow.python.keras as keras
8 | except:
9 | import tensorflow.keras as keras
10 |
11 | from tensorflow.python.keras import layers
12 | from tensorflow.python.keras import backend as K
13 |
14 | from sklearn.model_selection import KFold
15 |
16 | from sklearn import metrics
17 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
18 | from sklearn.metrics import f1_score,roc_auc_score, auc
19 |
20 | from keras import regularizers
21 |
22 | import os
23 | import scipy.io as sio
24 | from sklearn.model_selection import train_test_split
25 | from sklearn.preprocessing import LabelEncoder
26 | import numpy as np
27 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau
28 | from keras.utils import plot_model
29 |
30 |
31 |
32 |
33 | import matplotlib.pyplot as plt
34 |
35 | from tensorflow.python.keras.models import load_model
36 |
37 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
38 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
39 |
40 |
41 |
42 |
43 |
44 |
45 | #测试集
46 | def FULL_pca(Dropout1 = 0, Epochs = 20, Batch_size = 64, PCA_num = 18 ):
47 | # 优化器选择 Adam 优化器。
48 | # 损失函数使用 sparse_categorical_crossentropy,
49 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
50 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
51 |
52 | Feature_test = np.load("../../data/train_TCRA_PCA{}_feature_array.npy".format(PCA_num))
53 | Label_array = np.load("../../data/train_TCRA_PCA{}_label_array.npy".format(PCA_num))
54 | print('\n\nPCA_NUM: {}'.format(PCA_num))
55 | print('Feature.shape: {}'.format(Feature_test.shape))
56 | #print('Label.shape: {}'.format(Label_array.shape))
57 |
58 |
59 | X = Feature_test#[:,29:58,:]
60 | Y = Label_array[:,1]
61 |
62 | X = X.reshape(len(X),-1)
63 | #loo = LeaveOneOut()
64 |
65 | kf = KFold(n_splits=5,shuffle=True,random_state=0)
66 | kf.get_n_splits(X)
67 | TN = FP = FN = TP = 0
68 | aa = 1
69 | for train_index, test_index in kf.split(X):
70 | np.random.shuffle(train_index)
71 | np.random.shuffle(test_index)
72 |
73 | X_train, X_test = X[train_index], X[test_index]
74 | Y_train, Y_test = Y[train_index], Y[test_index]
75 |
76 | X_train= X_train.reshape([len(X_train),20,PCA_num+1,2])
77 | X_test = X_test.reshape([len(X_test),20,PCA_num+1,2])
78 | X_test=tf.cast(X_test, tf.float32)
79 |
80 | model = tf.keras.models.Sequential([
81 |
82 |
83 | tf.keras.layers.Flatten(input_shape=(20,PCA_num+1,2)),
84 |
85 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
86 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
87 |
88 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
89 | #tf.keras.layers.LeakyReLU(alpha=0.05),
90 |
91 |
92 |
93 | tf.keras.layers.Dense(128,activation='relu'),
94 | #tf.keras.layers.LeakyReLU(alpha=0.05),
95 | tf.keras.layers.Dense(64,activation='relu'),
96 | #tf.keras.layers.LeakyReLU(alpha=0.05),
97 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
98 | tf.keras.layers.Dense(1, activation='sigmoid')
99 |
100 |
101 |
102 |
103 | ])
104 |
105 | model.compile(optimizer="Adam",
106 | loss=keras.losses.binary_crossentropy,
107 | metrics=['accuracy'])
108 | model.fit(X_train, Y_train, epochs= Epochs , batch_size= Batch_size, verbose=0,)
109 |
110 |
111 | Y_pred = model.predict_classes(X_test)
112 | #print(Y_pred)
113 | confusion_matrix1 =confusion_matrix(Y_test,Y_pred)
114 |
115 |
116 |
117 |
118 |
119 | TP += confusion_matrix1[0,0]
120 | FN += confusion_matrix1[0,1]
121 | FP += confusion_matrix1[1,0]
122 | TN += confusion_matrix1[1,1]
123 |
124 | # accuracy = accuracy_score(Y_test,Y_pred) #准确率
125 | # precision = precision_score(Y_test,Y_pred) #精确率
126 | # recall = recall_score(Y_test,Y_pred) #召回率
127 | # f1= f1_score(Y_test,Y_pred) #F1
128 |
129 | # print('混淆矩阵\n',confusion_matrix1,
130 | # '\n准确率ACC:',accuracy,
131 | # '\n精确率precision:',precision,
132 | # '\n召回率recall:',recall,
133 | # '\nF1:',f1,
134 | # )
135 |
136 | # y_predict = model.predict(X_test)
137 |
138 | # y_probs = model.predict_proba(X_test) #模型的预测得分
139 | # #print(y_probs)
140 |
141 | # fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_probs)
142 | # roc_auc = auc(fpr, tpr) #auc为Roc曲线下的面积
143 | # #开始画ROC曲线
144 | # plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
145 | # plt.legend(loc='lower right')
146 | # plt.plot([0,1],[0,1],'r--')
147 | # plt.xlim([-0.1,1.1])
148 | # plt.ylim([-0.1,1.1])
149 | # plt.xlabel('False Positive Rate') #横坐标是fpr
150 | # plt.ylabel('True Positive Rate') #纵坐标是tpr
151 | # plt.title('Receiver operating characteristic example')
152 | # plt.show()
153 | #model.save('./data_625/model_'+str(aa)+'.h5')
154 | #print(aa)
155 |
156 |
157 | if aa == 1:
158 | Y_test_all = Y_test
159 | Y_pred_all = Y_pred
160 | else:
161 | Y_test_all = np.append(Y_test_all, Y_test, axis=0)
162 | Y_pred_all = np.append(Y_pred_all, Y_pred, axis=0)
163 |
164 | aa += 1
165 | del model
166 |
167 | print('总混淆矩阵')
168 | print(TP,FN)
169 | print(FP,TN)
170 |
171 | #print(Y_test_all[0])
172 |
173 | accuracy = accuracy_score(Y_test_all,Y_pred_all) #准确率
174 | precision = precision_score(Y_test_all,Y_pred_all) #精确率
175 | recall = recall_score(Y_test_all,Y_pred_all) #召回率
176 | f1= f1_score(Y_test_all,Y_pred_all) #F1
177 |
178 | MCC = matthews_corrcoef(Y_test_all,Y_pred_all) #MCC
179 |
180 |
181 |
182 | print('准确率ACC:',accuracy,
183 | '\n精确率precision:',precision,
184 | '\n召回率recall:',recall,
185 | '\nF1:',f1,
186 | '\nMCC:',MCC
187 | )
188 |
189 |
190 |
191 | for i in range(8,21):
192 |
193 | FULL_pca(0.3,50,128,i)
194 |
195 |
--------------------------------------------------------------------------------
/code/train/CNN_A_ALL_onehot.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | try:
4 | import tensorflow.python.keras as keras
5 | except:
6 | import tensorflow.keras as keras
7 |
8 | from tensorflow.python.keras import layers
9 | from tensorflow.python.keras import backend as K
10 |
11 | from sklearn.model_selection import KFold
12 |
13 | from sklearn import metrics
14 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
15 | from sklearn.metrics import f1_score,roc_auc_score, auc
16 |
17 | from keras import regularizers
18 |
19 | import os
20 | import scipy.io as sio
21 | from sklearn.model_selection import train_test_split
22 | from sklearn.preprocessing import LabelEncoder
23 | import numpy as np
24 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
25 | from keras.utils import plot_model
26 | #from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
27 |
28 |
29 |
30 | import matplotlib.pyplot as plt
31 | from sklearn.utils import shuffle
32 |
33 | from tensorflow.python.keras.models import load_model
34 |
35 |
36 | import matplotlib.pyplot as plt
37 |
38 | import csv
39 | import pandas as pd
40 |
41 |
42 |
43 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
44 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
45 |
46 |
47 |
48 | def CNN_onehot(modelfile,Dropout1=0,Epochs= 20,Batch_size=64,):
49 | # 优化器选择 Adam 优化器。
50 | # 损失函数使用 sparse_categorical_crossentropy,
51 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
52 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
53 |
54 | train_Feature = np.load("../../data/TCRA_train_feature_array.npy")
55 | train_Label = np.load("../../data/TCRA_train_label_array.npy")
56 |
57 | test_Feature = np.load("../../data/TCRA_test_feature_array.npy")
58 | test_Label = np.load("../../data/TCRA_test_label_array.npy")
59 |
60 | X_train = train_Feature[:,0:29,:]
61 | Y_train = train_Label
62 |
63 | X_test = test_Feature[:,0:29,:]
64 | Y_test = test_Label
65 |
66 | X_train,Y_train = shuffle(X_train,Y_train)
67 | X_test,Y_test = shuffle(X_test,Y_test)
68 |
69 | X_train= X_train.reshape([len(X_train),29,20,1])
70 | X_test = X_test.reshape([len(X_test),29,20,1])
71 | X_test=tf.cast(X_test, tf.float32)
72 |
73 | model = tf.keras.models.Sequential([
74 | # tf.keras.layers.Conv2D(16, (7,7),padding = 'same', input_shape=(29,20,2),activation='relu'),
75 | # #tf.keras.layers.LeakyReLU(alpha=0.05),
76 | # tf.keras.layers.MaxPooling2D(2,2),
77 | tf.keras.layers.Conv2D(64, (5,5),padding = 'same', input_shape=(29,20,1),activation='relu'),
78 | #tf.keras.layers.LeakyReLU(alpha=0.05),
79 | #tf.keras.layers.MaxPooling2D(2,2),
80 | tf.keras.layers.AveragePooling2D(2,2),
81 | tf.keras.layers.Conv2D(128, (3,3),padding = 'same',activation='relu'),
82 | #tf.keras.layers.LeakyReLU(alpha=0.05),
83 | #tf.keras.layers.MaxPooling2D(2,2),
84 | tf.keras.layers.AveragePooling2D(2,2),
85 | tf.keras.layers.Flatten(),
86 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
87 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
88 | #tf.keras.layers.LeakyReLU(alpha=0.05),
89 | tf.keras.layers.Dense(128,activation='relu'),
90 | #tf.keras.layers.LeakyReLU(alpha=0.05),
91 | tf.keras.layers.Dense(64,activation='relu'),
92 | #tf.keras.layers.LeakyReLU(alpha=0.05),
93 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
94 | tf.keras.layers.Dense(2, activation='softmax')
95 | ])
96 |
97 | model.compile(optimizer="Adam",
98 | loss=keras.losses.binary_crossentropy,
99 | metrics=['accuracy'])
100 | checkpoint = ModelCheckpoint(filepath=modelfile,
101 | monitor='val_loss',
102 | verbose=0,
103 | save_best_only=True)#,save_weights_only=True)
104 | cbs = [checkpoint]#, lr_reducer, lr_scheduler]
105 | history = model.fit(X_train,
106 | Y_train,
107 | epochs= Epochs ,
108 | batch_size= Batch_size,
109 | verbose=0,
110 | validation_data=(X_test, Y_test),
111 | shuffle=False,
112 | callbacks=cbs)
113 | return history
114 |
115 | del model
116 |
117 |
118 |
119 |
120 | csvFile = open("CNN_A_ALL_onehot_test_ACC.csv", "w" , newline='')
121 | csv_writer= csv.writer(csvFile)
122 |
123 |
124 |
125 |
126 |
127 |
128 | for model_number in range(1,51):
129 |
130 | modelfile = './model/CNN_A_ALL_onehot_{}.h5'.format(model_number)
131 | CNN_onehot(modelfile,0.3,50,128)
132 | print(1)
133 |
134 |
135 |
136 |
137 |
138 | def computing_result(Feature_array,Label_array,model):
139 |
140 | X_TEST = Feature_array
141 | Y_TEST = Label_array
142 |
143 | model1 = model
144 | Y_PRED = model1.predict(X_TEST)
145 |
146 | Y_pred2 = np.argmax(Y_PRED, axis=-1)
147 | Y_test2 = np.argmax(Y_TEST, axis=-1)
148 |
149 |
150 | confusion_matrix1 =confusion_matrix(Y_test2,Y_pred2)
151 |
152 | new_confusion_matrix1 = [[confusion_matrix1[1,1],confusion_matrix1[1,0]],[confusion_matrix1[0,1],confusion_matrix1[0,0]]]
153 | accuracy = accuracy_score(Y_test2,Y_pred2) #准确率
154 | precision = precision_score(Y_test2,Y_pred2) #精确率
155 | recall = recall_score(Y_test2,Y_pred2) #召回率
156 | f1= f1_score(Y_test2,Y_pred2) #F1
157 | MCC = matthews_corrcoef(Y_test2,Y_pred2) #MCC
158 |
159 | print('Y_TEST',Y_TEST[:,1].shape)
160 | print('Y_PRED',Y_PRED[:,1].shape)
161 |
162 | fpr, tpr, thresholds = metrics.roc_curve(Y_TEST[:,1], Y_PRED[:,1])
163 | print('fpr',fpr.shape)
164 | print('tpr',tpr.shape)
165 | roc_auc = auc(fpr, tpr)
166 |
167 | return new_confusion_matrix1,accuracy,precision,recall,f1,MCC,fpr,tpr,roc_auc
168 |
169 | def roc_plot(fpr,tpr,roc_auc):
170 | #开始画ROC曲线
171 | plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
172 | plt.legend(loc='lower right')
173 | plt.plot([0,1],[0,1],'r--')
174 | plt.xlim([-0.01,1.01])
175 | plt.ylim([0,1.01])
176 | plt.xlabel('False Positive Rate') #横坐标是fpr
177 | plt.ylabel('True Positive Rate') #纵坐标是tpr
178 | plt.title('Receiver operating characteristic')
179 | plt.show()
180 |
181 |
182 |
183 |
184 |
185 | def writeMetrics(metricsFile,new_confusion_matrix1,accuracy,precision,recall,f1,MCC,roc_auc,noteInfo=''):
186 |
187 | with open(metricsFile,'a') as fw:
188 | if noteInfo:
189 | fw.write('\n\n' + noteInfo + '\n')
190 | fw.write('混淆矩阵\n',new_confusion_matrix1[0],'\n',new_confusion_matrix1[1])
191 | fw.write('\n准确率ACC:: %f '%accuracy)
192 | fw.write('\n精确率precision: %f '%precision)
193 | fw.write('\n召回率recall: %f '%recall)
194 | fw.write('\nF1: %f '%f1)
195 | fw.write('\nMCC: %f '%MCC)
196 | fw.write('\nAUC: %f '%roc_auc)
197 |
198 |
199 |
200 |
201 | fileHeader =['model_number','dataset','TP','FN','FP','TN','ACC','precision','recall','f1','MCC','AUC']
202 | # 写入数据
203 |
204 | csvFile = open("CNN_A_ALL_onehot_result50.csv", "w" , newline='')
205 | csv_writer = csv.writer(csvFile)
206 | csv_writer.writerow(fileHeader)
207 |
208 | for model_number in range(1,51):
209 |
210 | modelfile = './model/CNN_A_ALL_onehot_{}.h5'.format(model_number)
211 | model = load_model(modelfile)
212 |
213 |
214 |
215 | test_Feature = np.load("../../data/TCRA_test_feature_array.npy")
216 | test_Label = np.load("../../data/TCRA_test_label_array.npy")
217 |
218 | X_test = test_Feature[:,0:29,:]
219 | Y_test = test_Label
220 | X_test = X_test.reshape([len(X_test),29,20,1])
221 |
222 | test_CM,accuracy1,precision1,recall1,f11,MCC1,fpr1,tpr1,roc_auc1 = computing_result(X_test,Y_test,model)
223 | # print('\n\n************************************************************',
224 | # '\n********************TEST model_{}结果***********************'.format(model_number),
225 | # '\n混淆矩阵\n',test_CM[0],
226 | # '\n',test_CM[1],
227 | # '\n准确率ACC:',accuracy1,
228 | # '\n精确率precision:',precision1,
229 | # '\n召回率recall:',recall1,
230 | # '\nF1:',f11,
231 | # '\nMCC:',MCC1
232 | # )
233 |
234 | #roc_plot(fpr1,tpr1,roc_auc1)
235 |
236 |
237 | test_row = [model_number,'TEST',
238 | test_CM[0][0],test_CM[0][1],
239 | test_CM[1][0],test_CM[1][1],
240 | accuracy1,precision1,recall1,f11,MCC1,roc_auc1]
241 |
242 |
243 |
244 |
245 |
246 | csv_writer.writerow(test_row)
247 |
248 |
249 | del model
250 | csvFile.close()
251 |
252 |
--------------------------------------------------------------------------------
/code/train/CNN_B_ALL_pca20.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 |
5 |
6 | import tensorflow as tf
7 |
8 | try:
9 | import tensorflow.python.keras as keras
10 | except:
11 | import tensorflow.keras as keras
12 |
13 | from tensorflow.python.keras import layers
14 | from tensorflow.python.keras import backend as K
15 |
16 | from sklearn.model_selection import KFold
17 |
18 | from sklearn import metrics
19 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
20 | from sklearn.metrics import f1_score,roc_auc_score, auc
21 |
22 | from keras import regularizers
23 |
24 | import os
25 | import scipy.io as sio
26 | from sklearn.model_selection import train_test_split
27 | from sklearn.preprocessing import LabelEncoder
28 | import numpy as np
29 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
30 | from keras.utils import plot_model
31 | #from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
32 |
33 |
34 |
35 | import matplotlib.pyplot as plt
36 | from sklearn.utils import shuffle
37 |
38 | from tensorflow.python.keras.models import load_model
39 |
40 |
41 | import matplotlib.pyplot as plt
42 |
43 | import csv
44 | import pandas as pd
45 |
46 |
47 |
48 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
49 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
50 |
51 |
52 |
53 | def CNN_pca20(modelfile,Dropout1=0,Epochs= 20,Batch_size=64,PCA_num = 20):
54 | # 优化器选择 Adam 优化器。
55 | # 损失函数使用 sparse_categorical_crossentropy,
56 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
57 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
58 |
59 |
60 | train_Feature = np.load("../../data/train_TCRB_PCA{}_feature_array.npy".format(PCA_num))
61 | train_Label = np.load("../../data/train_TCRB_PCA{}_label_array.npy".format(PCA_num))
62 |
63 | test_Feature = np.load("../../data/test_TCRB_PCA{}_feature_array.npy".format(PCA_num))
64 | test_Label = np.load("../../data/test_TCRB_PCA{}_label_array.npy".format(PCA_num))
65 |
66 | X_train = train_Feature
67 | Y_train = train_Label
68 |
69 | X_test = test_Feature
70 | Y_test = test_Label
71 |
72 | X_train,Y_train = shuffle(X_train,Y_train)
73 | X_test,Y_test = shuffle(X_test,Y_test)
74 |
75 | X_train= X_train.reshape([len(X_train),20,PCA_num+1,2])
76 | X_test = X_test.reshape([len(X_test),20,PCA_num+1,2])
77 | X_test=tf.cast(X_test, tf.float32)
78 |
79 | model = tf.keras.models.Sequential([
80 | # tf.keras.layers.Conv2D(16, (7,7),padding = 'same', input_shape=(29,20,2),activation='relu'),
81 | # #tf.keras.layers.LeakyReLU(alpha=0.05),
82 | # tf.keras.layers.MaxPooling2D(2,2),
83 | tf.keras.layers.Conv2D(64, (5,5),padding = 'same', input_shape=(20,PCA_num+1,2),activation='relu'),
84 | #tf.keras.layers.LeakyReLU(alpha=0.05),
85 | #tf.keras.layers.MaxPooling2D(2,2),
86 | tf.keras.layers.AveragePooling2D(2,2),
87 | tf.keras.layers.Conv2D(128, (3,3),padding = 'same',activation='relu'),
88 | #tf.keras.layers.LeakyReLU(alpha=0.05),
89 | #tf.keras.layers.MaxPooling2D(2,2),
90 | tf.keras.layers.AveragePooling2D(2,2),
91 | tf.keras.layers.Flatten(),
92 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
93 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
94 | #tf.keras.layers.LeakyReLU(alpha=0.05),
95 | tf.keras.layers.Dense(128,activation='relu'),
96 | #tf.keras.layers.LeakyReLU(alpha=0.05),
97 | tf.keras.layers.Dense(64,activation='relu'),
98 | #tf.keras.layers.LeakyReLU(alpha=0.05),
99 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
100 | tf.keras.layers.Dense(2, activation='softmax')
101 | ])
102 |
103 | model.compile(optimizer="Adam",
104 | loss=keras.losses.binary_crossentropy,
105 | metrics=['accuracy'])
106 | checkpoint = ModelCheckpoint(filepath=modelfile,
107 | monitor='val_loss',
108 | verbose=0,
109 | save_best_only=True)#,save_weights_only=True)
110 | cbs = [checkpoint]#, lr_reducer, lr_scheduler]
111 | history = model.fit(X_train,
112 | Y_train,
113 | epochs= Epochs ,
114 | batch_size= Batch_size,
115 | verbose=0,
116 | validation_data=(x_test, y_test),
117 | shuffle=False,
118 | callbacks=cbs)
119 | return history
120 | del model
121 |
122 |
123 |
124 |
125 |
126 | csvFile = open("CNN_B_ALL_pca20_test_ACC.csv", "w" , newline='')
127 | csv_writer= csv.writer(csvFile)
128 |
129 |
130 |
131 |
132 | for model_number in range(1,51):
133 | print(model_number)
134 | modelfile = './model/CNN_B_ALL_pca20_plt_{}.h5'.format(model_number)
135 | history = CNN_pca20(modelfile,0.3,300,128,20)
136 |
137 |
138 | test_row = history.history['val_accuracy']
139 |
140 | csv_writer.writerow(test_row)
141 | csvFile.close()
142 |
143 |
144 |
145 |
146 |
147 |
148 | def computing_result(Feature_array,Label_array,model):
149 |
150 | X_TEST = Feature_array
151 | Y_TEST = Label_array
152 |
153 | model1 = model
154 | Y_PRED = model1.predict(X_TEST)
155 |
156 | Y_pred2 = np.argmin(Y_PRED, axis=-1)
157 | Y_test2 = np.argmin(Y_TEST, axis=-1)
158 |
159 |
160 | confusion_matrix1 =confusion_matrix(Y_test2,Y_pred2)
161 |
162 | new_confusion_matrix1 = [[confusion_matrix1[1,1],confusion_matrix1[1,0]],[confusion_matrix1[0,1],confusion_matrix1[0,0]]]
163 | accuracy = accuracy_score(Y_test2,Y_pred2) #准确率
164 | precision = precision_score(Y_test2,Y_pred2) #精确率
165 | recall = recall_score(Y_test2,Y_pred2) #召回率
166 | f1= f1_score(Y_test2,Y_pred2) #F1
167 | MCC = matthews_corrcoef(Y_test2,Y_pred2) #MCC
168 |
169 |
170 |
171 | fpr, tpr, thresholds = metrics.roc_curve(Y_TEST[:,1], Y_PRED[:,1])
172 | roc_auc = auc(fpr, tpr)
173 |
174 | return new_confusion_matrix1,accuracy,precision,recall,f1,MCC,fpr,tpr,roc_auc
175 |
176 | def roc_plot(fpr,tpr,roc_auc):
177 | #开始画ROC曲线
178 | plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
179 | plt.legend(loc='lower right')
180 | plt.plot([0,1],[0,1],'r--')
181 | plt.xlim([-0.01,1.01])
182 | plt.ylim([0,1])
183 | plt.xlabel('False Positive Rate') #横坐标是fpr
184 | plt.ylabel('True Positive Rate') #纵坐标是tpr
185 | plt.title('Receiver operating characteristic')
186 | plt.show()
187 |
188 |
189 |
190 |
191 |
192 |
193 | def writeMetrics(metricsFile,new_confusion_matrix1,accuracy,precision,recall,f1,MCC,roc_auc,noteInfo=''):
194 |
195 | with open(metricsFile,'a') as fw:
196 | if noteInfo:
197 | fw.write('\n\n' + noteInfo + '\n')
198 | fw.write('混淆矩阵\n',new_confusion_matrix1[0],'\n',new_confusion_matrix1[1])
199 | fw.write('\n准确率ACC:: %f '%accuracy)
200 | fw.write('\n精确率precision: %f '%precision)
201 | fw.write('\n召回率recall: %f '%recall)
202 | fw.write('\nF1: %f '%f1)
203 | fw.write('\nMCC: %f '%MCC)
204 | fw.write('\nAUC: %f '%roc_auc)
205 |
206 |
207 |
208 |
209 | fileHeader =['model_number','dataset','TP','FN','FP','TN','ACC','precision','recall','f1','MCC','AUC']
210 | # 写入数据
211 |
212 | csvFile = open("CNN_B_ALL_pca20_result50.csv", "w" , newline='')
213 | csv_writer = csv.writer(csvFile)
214 | csv_writer.writerow(fileHeader)
215 | PCA_num = 20
216 | for model_number in range(1,51):
217 |
218 | modelfile = './model/CNN_B_ALL_pca20_{}.h5'.format(model_number)
219 | model = load_model(modelfile)
220 |
221 |
222 | test_Feature = np.load("../../data/test_TCRB_PCA{}_feature_array.npy".format(PCA_num))
223 | test_Label = np.load("../../data/test_TCRB_PCA{}_label_array.npy".format(PCA_num))
224 |
225 |
226 |
227 | X_test = test_Feature
228 | Y_test = test_Label
229 | X_test = X_test.reshape([len(X_test),20,PCA_num+1,2])
230 |
231 | test_CM,accuracy1,precision1,recall1,f11,MCC1,fpr1,tpr1,roc_auc1 = computing_result(X_test,Y_test,model)
232 |
233 |
234 | test_row = [model_number,'TEST',
235 | test_CM[0][0],test_CM[0][1],
236 | test_CM[1][0],test_CM[1][1],
237 | accuracy1,precision1,recall1,f11,MCC1,roc_auc1]
238 |
239 |
240 |
241 | csv_writer.writerow(test_row)
242 |
243 |
244 | del model
245 | csvFile.close()
246 |
247 |
248 |
249 |
--------------------------------------------------------------------------------
/code/train/FULL_A_ALL_onehot.py:
--------------------------------------------------------------------------------
1 |
2 | import tensorflow as tf
3 |
4 | try:
5 | import tensorflow.python.keras as keras
6 | except:
7 | import tensorflow.keras as keras
8 |
9 | from tensorflow.python.keras import layers
10 | from tensorflow.python.keras import backend as K
11 |
12 | from sklearn.model_selection import KFold
13 |
14 | from sklearn import metrics
15 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
16 | from sklearn.metrics import f1_score,roc_auc_score, auc
17 |
18 | from keras import regularizers
19 |
20 | import os
21 | import scipy.io as sio
22 | from sklearn.model_selection import train_test_split
23 | from sklearn.preprocessing import LabelEncoder
24 | import numpy as np
25 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
26 | from keras.utils import plot_model
27 | #from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
28 |
29 |
30 |
31 | import matplotlib.pyplot as plt
32 | from sklearn.utils import shuffle
33 |
34 | from tensorflow.python.keras.models import load_model
35 |
36 |
37 | import matplotlib.pyplot as plt
38 |
39 | import csv
40 | import pandas as pd
41 |
42 |
43 |
44 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
45 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
46 | from keras.utils import plot_model
47 |
48 |
49 |
50 | def FULL_onehot(modelfile,Dropout1=0,Epochs= 20,Batch_size=64,):
51 | # 优化器选择 Adam 优化器。
52 | # 损失函数使用 sparse_categorical_crossentropy,
53 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
54 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
55 |
56 | train_Feature = np.load("../../data/TCRA_train_feature_array.npy")
57 | train_Label = np.load("../../data/TCRA_train_label_array.npy")
58 |
59 | test_Feature = np.load("../../data/TCRA_test_feature_array.npy")
60 | test_Label = np.load("../../data/TCRA_test_label_array.npy")
61 |
62 | X_train = train_Feature[:,0:29,:]
63 | Y_train = train_Label
64 |
65 | X_test = test_Feature[:,0:29,:]
66 | Y_test = test_Label
67 |
68 | X_train,Y_train = shuffle(X_train,Y_train)
69 | X_test,Y_test = shuffle(X_test,Y_test)
70 |
71 | X_train= X_train.reshape([len(X_train),29,20,1])
72 | X_test = X_test.reshape([len(X_test),29,20,1])
73 | X_test=tf.cast(X_test, tf.float32)
74 |
75 | model = tf.keras.models.Sequential([
76 |
77 |
78 | tf.keras.layers.Flatten(input_shape=(29,20,1)),
79 |
80 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
81 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
82 |
83 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
84 | #tf.keras.layers.LeakyReLU(alpha=0.05),
85 |
86 | tf.keras.layers.Dense(128,activation='relu'),
87 | #tf.keras.layers.LeakyReLU(alpha=0.05),
88 | tf.keras.layers.Dense(64,activation='relu'),
89 | #tf.keras.layers.LeakyReLU(alpha=0.05),
90 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
91 | tf.keras.layers.Dense(2, activation='softmax')
92 |
93 | ])
94 |
95 | model.compile(optimizer="Adam",
96 | loss=keras.losses.binary_crossentropy,
97 | metrics=['accuracy'],
98 | #lr=0.2
99 | )
100 | checkpoint = ModelCheckpoint(filepath=modelfile,
101 | monitor='val_loss',
102 | verbose=0,
103 | save_best_only=True)#,save_weights_only=True)
104 |
105 |
106 | cbs = [checkpoint]#, lr_reducer, lr_scheduler]
107 | history = model.fit(X_train,
108 | Y_train,
109 | epochs= Epochs ,
110 | batch_size= Batch_size,
111 | verbose=0,
112 | validation_data=(x_test, y_test),
113 | #validation_split=0.5,
114 | shuffle=True,
115 | callbacks=cbs)
116 |
117 |
118 | #plot_model(model, to_file='model.png')
119 | return history
120 |
121 |
122 |
123 | del model
124 |
125 |
126 |
127 |
128 | csvFile = open("FULL_A_ALL_onehot_ACC.csv", "w" , newline='')
129 | csv_writer= csv.writer(csvFile)
130 |
131 |
132 |
133 |
134 | for model_number in range(1,51):
135 |
136 | modelfile = './model/FULL_A_ALL_onehot_plt_{}.h5'.format(model_number)
137 | history = FULL_onehot(modelfile,0.3,300,128)
138 |
139 |
140 | test_row = history.history['val_accuracy']
141 |
142 | csv_writer.writerow(test_row)
143 |
144 | csvFile.close()
145 |
146 |
147 |
148 |
149 |
150 | def computing_result(Feature_array,Label_array,model):
151 |
152 | X_TEST = Feature_array
153 | Y_TEST = Label_array
154 |
155 | model1 = model
156 | Y_PRED = model1.predict(X_TEST)
157 |
158 | Y_pred2 = np.argmax(Y_PRED, axis=-1)
159 | Y_test2 = np.argmax(Y_TEST, axis=-1)
160 |
161 |
162 | confusion_matrix1 =confusion_matrix(Y_test2,Y_pred2)
163 |
164 | new_confusion_matrix1 = [[confusion_matrix1[1,1],confusion_matrix1[1,0]],[confusion_matrix1[0,1],confusion_matrix1[0,0]]]
165 | accuracy = accuracy_score(Y_test2,Y_pred2) #准确率
166 | precision = precision_score(Y_test2,Y_pred2) #精确率
167 | recall = recall_score(Y_test2,Y_pred2) #召回率
168 | f1= f1_score(Y_test2,Y_pred2) #F1
169 | MCC = matthews_corrcoef(Y_test2,Y_pred2) #MCC
170 |
171 |
172 |
173 | fpr, tpr, thresholds = metrics.roc_curve(Y_TEST[:,1], Y_PRED[:,1])
174 | roc_auc = auc(fpr, tpr)
175 |
176 | return new_confusion_matrix1,accuracy,precision,recall,f1,MCC,fpr,tpr,roc_auc
177 |
178 | def roc_plot(fpr,tpr,roc_auc):
179 | #开始画ROC曲线
180 | plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
181 | plt.legend(loc='lower right')
182 | plt.plot([0,1],[0,1],'r--')
183 | plt.xlim([-0.01,1.01])
184 | plt.ylim([0,1])
185 | plt.xlabel('False Positive Rate') #横坐标是fpr
186 | plt.ylabel('True Positive Rate') #纵坐标是tpr
187 | plt.title('Receiver operating characteristic')
188 | plt.show()
189 |
190 |
191 |
192 |
193 | def writeMetrics(metricsFile,new_confusion_matrix1,accuracy,precision,recall,f1,MCC,roc_auc,noteInfo=''):
194 |
195 | with open(metricsFile,'a') as fw:
196 | if noteInfo:
197 | fw.write('\n\n' + noteInfo + '\n')
198 | fw.write('混淆矩阵\n',new_confusion_matrix1[0],'\n',new_confusion_matrix1[1])
199 | fw.write('\n准确率ACC:: %f '%accuracy)
200 | fw.write('\n精确率precision: %f '%precision)
201 | fw.write('\n召回率recall: %f '%recall)
202 | fw.write('\nF1: %f '%f1)
203 | fw.write('\nMCC: %f '%MCC)
204 | fw.write('\nAUC: %f '%roc_auc)
205 |
206 |
207 |
208 |
209 | fileHeader =['model_number','dataset','TP','FN','FP','TN','ACC','precision','recall','f1','MCC','AUC']
210 | # 写入数据
211 |
212 | csvFile = open("FULL_A_ALL_onehot_result.csv", "w" , newline='')
213 | csv_writer = csv.writer(csvFile)
214 | csv_writer.writerow(fileHeader)
215 |
216 | for model_number in range(1,51):
217 |
218 | modelfile = './model/FULL_A_ALL_onehot_{}.h5'.format(model_number)
219 | model = load_model(modelfile)
220 |
221 |
222 |
223 | test_Feature = np.load("../../data_all/TCRA_test_feature_array.npy")
224 | test_Label = np.load("../../data_all/TCRA_test_label_array.npy")
225 |
226 | X_test = test_Feature[:,0:29,:]
227 | Y_test = test_Label
228 | X_test = X_test.reshape([len(X_test),29,20,1])
229 |
230 | test_CM,accuracy1,precision1,recall1,f11,MCC1,fpr1,tpr1,roc_auc1 = computing_result(X_test,Y_test,model)
231 |
232 |
233 | test_row = [model_number,'TEST',
234 | test_CM[0][0],test_CM[0][1],
235 | test_CM[1][0],test_CM[1][1],
236 | accuracy1,precision1,recall1,f11,MCC1,roc_auc1]
237 |
238 |
239 | csv_writer.writerow(test_row)
240 |
241 |
242 | del model
243 | csvFile.close()
244 |
245 |
246 |
247 |
248 |
--------------------------------------------------------------------------------
/code/train/FULL_B_ALL_pca18.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | try:
4 | import tensorflow.python.keras as keras
5 | except:
6 | import tensorflow.keras as keras
7 |
8 | from tensorflow.python.keras import layers
9 | from tensorflow.python.keras import backend as K
10 |
11 | from sklearn.model_selection import KFold
12 |
13 | from sklearn import metrics
14 | from sklearn.metrics import accuracy_score,matthews_corrcoef,classification_report,confusion_matrix,precision_score,recall_score
15 | from sklearn.metrics import f1_score,roc_auc_score, auc
16 |
17 | from keras import regularizers
18 |
19 | import os
20 | import scipy.io as sio
21 | from sklearn.model_selection import train_test_split
22 | from sklearn.preprocessing import LabelEncoder
23 | import numpy as np
24 | from tensorflow.python.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
25 | from keras.utils import plot_model
26 | #from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
27 |
28 |
29 |
30 | import matplotlib.pyplot as plt
31 | from sklearn.utils import shuffle
32 |
33 | from tensorflow.python.keras.models import load_model
34 |
35 |
36 | import matplotlib.pyplot as plt
37 |
38 | import csv
39 | import pandas as pd
40 |
41 |
42 |
43 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 只显示error和warining信息 3 只显示error信息
44 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这一行注释掉就是使用cpu,不注释就是使用gpu
45 |
46 |
47 |
48 |
49 | def FULL_pca18(modelfile,Dropout1=0,Epochs= 20,Batch_size=64,PCA_num = 18):
50 | # 优化器选择 Adam 优化器。
51 | # 损失函数使用 sparse_categorical_crossentropy,
52 | # 还有一个损失函数是 categorical_crossentropy,两者的区别在于输入的真实标签的形式,
53 | # sparse_categorical 输入的是整形的标签,例如 [1, 2, 3, 4],categorical 输入的是 one-hot 编码的标签。
54 |
55 |
56 | train_Feature = np.load("../../data/train_TCRB_PCA{}_feature_array.npy".format(PCA_num))
57 | train_Label = np.load("../../data/train_TCRB_PCA{}_label_array.npy".format(PCA_num))
58 |
59 | test_Feature = np.load("../../data/test_TCRB_PCA{}_feature_array.npy".format(PCA_num))
60 | test_Label = np.load("../../data/test_TCRB_PCA{}_label_array.npy".format(PCA_num))
61 |
62 | X_train = train_Feature
63 | Y_train = train_Label
64 |
65 | X_test = test_Feature
66 | Y_test = test_Label
67 |
68 | X_train,Y_train = shuffle(X_train,Y_train)
69 | X_test,Y_test = shuffle(X_test,Y_test)
70 |
71 | X_train= X_train.reshape([len(X_train),20,PCA_num+1,2])
72 | X_test = X_test.reshape([len(X_test),20,PCA_num+1,2])
73 | X_test=tf.cast(X_test, tf.float32)
74 |
75 | model = tf.keras.models.Sequential([
76 |
77 |
78 | tf.keras.layers.Flatten(input_shape=(20,PCA_num+1,2)),
79 |
80 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
81 | tf.keras.layers.Dense(512,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
82 |
83 | tf.keras.layers.Dense(256,activation='relu'),# kernel_regularizer=regularizers.l2(0.01)),# activation='relu',
84 | #tf.keras.layers.LeakyReLU(alpha=0.05),
85 |
86 |
87 |
88 | tf.keras.layers.Dense(128,activation='relu'),
89 | #tf.keras.layers.LeakyReLU(alpha=0.05),
90 | tf.keras.layers.Dense(64,activation='relu'),
91 | #tf.keras.layers.LeakyReLU(alpha=0.05),
92 | tf.keras.layers.Dropout(Dropout1),# Dropout:在 0 和 1 之间浮动。需要丢弃的输入比例
93 | tf.keras.layers.Dense(2, activation='softmax')
94 |
95 | ])
96 |
97 | model.compile(optimizer="Adam",
98 | loss=keras.losses.binary_crossentropy,
99 | metrics=['accuracy'])
100 | checkpoint = ModelCheckpoint(filepath=modelfile,
101 | monitor='val_loss',
102 | verbose=0,
103 | save_best_only=True)#,save_weights_only=True)
104 | cbs = [checkpoint]#, lr_reducer, lr_scheduler]
105 | cbs = [checkpoint]#, lr_reducer, lr_scheduler]
106 | history = model.fit(X_train,
107 | Y_train,
108 | epochs= Epochs ,
109 | batch_size= Batch_size,
110 | verbose=0,
111 | validation_data=(x_test, y_test),
112 | shuffle=False,
113 | callbacks=cbs)
114 | return history
115 |
116 | del model
117 |
118 |
119 |
120 | csvFile = open("FULL_B_ALL_pca18_test_ACC.csv", "w" , newline='')
121 | csv_writer= csv.writer(csvFile)
122 |
123 |
124 | for model_number in range(50):
125 | print(model_number)
126 | modelfile = './model/FULL_B_ALL_pca18_plt_{}.h5'.format(model_number)
127 | history = FULL_pca18(modelfile,0.3,300,128,18)
128 |
129 |
130 | test_row = history.history['val_accuracy']
131 |
132 | csv_writer.writerow(test_row)
133 | csvFile.close()
134 |
135 |
136 |
137 |
138 | for model_number in range(1,51):
139 |
140 | modelfile = './model/FULL_B_ALL_pca18_{}.h5'.format(model_number)
141 | FULL_pca18(modelfile,0.3,50,128,18)
142 |
143 |
144 |
145 |
146 |
147 | def computing_result(Feature_array,Label_array,model):
148 |
149 | X_TEST = Feature_array
150 | Y_TEST = Label_array
151 |
152 | model1 = model
153 | Y_PRED = model1.predict(X_TEST)
154 |
155 | Y_pred2 = np.argmin(Y_PRED, axis=-1)
156 | Y_test2 = np.argmin(Y_TEST, axis=-1)
157 |
158 |
159 | confusion_matrix1 =confusion_matrix(Y_test2,Y_pred2)
160 |
161 | new_confusion_matrix1 = [[confusion_matrix1[1,1],confusion_matrix1[1,0]],[confusion_matrix1[0,1],confusion_matrix1[0,0]]]
162 | accuracy = accuracy_score(Y_test2,Y_pred2) #准确率
163 | precision = precision_score(Y_test2,Y_pred2) #精确率
164 | recall = recall_score(Y_test2,Y_pred2) #召回率
165 | f1= f1_score(Y_test2,Y_pred2) #F1
166 | MCC = matthews_corrcoef(Y_test2,Y_pred2) #MCC
167 |
168 |
169 |
170 | fpr, tpr, thresholds = metrics.roc_curve(Y_TEST[:,1], Y_PRED[:,1])
171 | roc_auc = auc(fpr, tpr)
172 |
173 | return new_confusion_matrix1,accuracy,precision,recall,f1,MCC,fpr,tpr,roc_auc
174 |
175 | def roc_plot(fpr,tpr,roc_auc):
176 | #开始画ROC曲线
177 | plt.plot(fpr, tpr, 'b',label='AUC = %0.2f'% roc_auc)
178 | plt.legend(loc='lower right')
179 | plt.plot([0,1],[0,1],'r--')
180 | plt.xlim([-0.01,1.01])
181 | plt.ylim([0,1])
182 | plt.xlabel('False Positive Rate') #横坐标是fpr
183 | plt.ylabel('True Positive Rate') #纵坐标是tpr
184 | plt.title('Receiver operating characteristic')
185 | plt.show()
186 |
187 |
188 |
189 |
190 |
191 | # In[ ]:
192 |
193 |
194 | def writeMetrics(metricsFile,new_confusion_matrix1,accuracy,precision,recall,f1,MCC,roc_auc,noteInfo=''):
195 |
196 | with open(metricsFile,'a') as fw:
197 | if noteInfo:
198 | fw.write('\n\n' + noteInfo + '\n')
199 | fw.write('混淆矩阵\n',new_confusion_matrix1[0],'\n',new_confusion_matrix1[1])
200 | fw.write('\n准确率ACC:: %f '%accuracy)
201 | fw.write('\n精确率precision: %f '%precision)
202 | fw.write('\n召回率recall: %f '%recall)
203 | fw.write('\nF1: %f '%f1)
204 | fw.write('\nMCC: %f '%MCC)
205 | fw.write('\nAUC: %f '%roc_auc)
206 |
207 |
208 |
209 |
210 | fileHeader =['model_number','dataset','TP','FN','FP','TN','ACC','precision','recall','f1','MCC','AUC']
211 | # 写入数据
212 |
213 | csvFile = open("FULL_B_ALL_pca18_result1.csv", "w" , newline='')
214 | csv_writer = csv.writer(csvFile)
215 | csv_writer.writerow(fileHeader)
216 | PCA_num = 18
217 | for model_number in range(1,51):
218 |
219 | modelfile = './model/FULL_B_ALL_pca18_{}.h5'.format(model_number)
220 | model = load_model(modelfile)
221 |
222 |
223 | test_Feature = np.load("../../data/test_TCRB_PCA{}_feature_array.npy".format(PCA_num))
224 | test_Label = np.load("../../data/test_TCRB_PCA{}_label_array.npy".format(PCA_num))
225 |
226 |
227 |
228 | X_test = test_Feature
229 | Y_test = test_Label
230 | X_test = X_test.reshape([len(X_test),20,PCA_num+1,2])
231 |
232 | test_CM,accuracy1,precision1,recall1,f11,MCC1,fpr1,tpr1,roc_auc1 = computing_result(X_test,Y_test,model)
233 |
234 |
235 | test_row = [model_number,'TEST',
236 | test_CM[0][0],test_CM[0][1],
237 | test_CM[1][0],test_CM[1][1],
238 | accuracy1,precision1,recall1,f11,MCC1,roc_auc1]
239 |
240 | csv_writer.writerow(test_row)
241 |
242 |
243 | del model
244 | csvFile.close()
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 | # In[ ]:
254 |
255 |
256 |
257 |
258 |
259 | # In[ ]:
260 |
261 |
262 |
263 |
264 |
--------------------------------------------------------------------------------
/data/Example_file.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/data/Example_file.xlsx
--------------------------------------------------------------------------------
/data/TRA-VDJdb_TCR cross-reactivity.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/data/TRA-VDJdb_TCR cross-reactivity.rar
--------------------------------------------------------------------------------
/data/TRB_VDJdb_TCR cross-reactivity.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/data/TRB_VDJdb_TCR cross-reactivity.rar
--------------------------------------------------------------------------------
/model/CNN_A_ALL_onehot.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/model/CNN_A_ALL_onehot.h5
--------------------------------------------------------------------------------
/model/CNN_B_ALL_pca20.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/model/CNN_B_ALL_pca20.h5
--------------------------------------------------------------------------------
/model/FULL_A_ALL_onehot.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/model/FULL_A_ALL_onehot.h5
--------------------------------------------------------------------------------
/model/FULL_B_ALL_pca18.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/model/FULL_B_ALL_pca18.h5
--------------------------------------------------------------------------------
/model/RESNET_A_ALL_pca15.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/model/RESNET_A_ALL_pca15.h5
--------------------------------------------------------------------------------
/model/RESNET_B_ALL_pca10.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiangBioLab/DLpTCR/15e4e0a12dcbe180fbb68a3e9e6e014ad08e2796/model/RESNET_B_ALL_pca10.h5
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal10_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742085753, 5.659693734212337, 16.59920307640724, 1.3097949561732563, 3.3587565536251027, 1.9574310258597334, -3.6569027228538658, -0.6627086148590549, 3.544846729374547, -2.2801555125623176], 'R': [-8.175186369473595, -15.780858051446053, -1.1126192621775017, 0.42448908547526387, -8.77714789356098, -8.281651787808485, -3.2240499230410316, -5.524673141417394, -2.221851093943971, -5.846545601385179], 'N': [-15.225239891019065, -0.0479325895799851, -4.124878352673224, -5.722295080397374, -1.9431521714500513, 3.1604092225397413, 2.6256212266098085, 5.522366181432272, -0.12654664349429007, 4.788495578177068], 'D': [-18.353467989973897, -2.9340346325772964, -0.038343090072385566, -2.0130359823062203, 5.910450550347398, 5.475429725958867, 9.028241483968309, -1.4021128977047073, -2.049013496328119, -1.3710997828544935], 'C': [8.222843193810014, 8.194821816409593, -7.137235317851964, -14.957017737863648, 10.512318402717188, -8.495575154730743, -1.8124387957731978, -5.003456069715598, -1.307595004322072, 4.16310516781006], 'Q': [-7.867675634628641, -9.076027179235771, 1.827680124910264, -0.3515709671485801, 1.4703551942537174, -2.4901219979030365, -1.1344037146180044, 0.4465658558487486, 0.4627815179372448, 0.3605135132721673], 'E': [-12.058054880866372, -11.669599326113408, 11.278427001230764, 2.8029109090065396, 8.313613574599778, 3.5150335889209474, 4.80580494878677, -3.3056966753904065, -2.664434406210688, -1.1018335010043232], 'G': [-16.404901187801617, 21.593461636504667, 1.4058083342047984, -5.276889954049388, -8.113453005934891, 9.225328286159437, -5.809173297759754, -4.253510380685129, -3.683105640188344, -1.8804761670436707], 'H': [-0.32930562313103096, -8.348482503523785, -4.903228442934703, -4.586996096585348, 0.9373596019507588, 0.7175895985637355, -4.271917130801593, 9.329191924688152, -3.112185534062506, -2.9973171020077367], 'I': [20.674129755802067, 5.865712858513266, 2.775330928510455, 3.819351167358555, -2.760304951650104, -2.009496639472526, 4.0138437132994635, 0.009658360646198363, -3.909491707357819, 1.2515059014515844], 'L': [17.902498302961114, 3.2666672048004273, 11.474198681825108, 6.58229890863799, -1.4027275618621016, 1.8987092316588507, -0.40278169709884354, 0.8865619205386803, 1.6335919603390416, 4.810411896313657], 'K': [-11.69394051138798, -13.413683614641116, 5.794685498068514, 1.722039937781359, -6.102149735719839, -2.0258015986571336, -4.588978122511402, -1.1381017632844523, 1.0363142137598516, 8.91828268700276], 'M': [16.174830528202122, -4.760543431233173, 3.1780613579676316, -2.2992528102211174, 6.945059549890804, 1.9684904609094518, -7.026278908301159, 3.450159780310665, -1.6438627376419144, -2.9283763870146315], 'F': [19.44105221572266, -0.46427598198956077, -3.9881594026874407, 2.5372819350191747, -1.1799767423052847, 4.1320395283463816, 0.6080239521216942, 2.406717507681435, -1.2231610256216745, -1.3304606151033624], 'P': [-16.816701121719976, 11.21975995148225, -14.556461364270172, 19.034938348204143, 7.438403478086218, -3.7808532771141095, -3.7542244823536, 0.07407835444765008, -0.7941886462122676, 0.618071761209133], 'S': [-12.651824913994762, 8.06425248149757, 1.3620238910508002, -3.4268749231593243, -2.2449411844976677, -2.883000629192446, 1.7999401599018938, 3.416153890442091, 6.239889761498302, 0.09914727935834979], 'T': [-4.930504019551637, 6.012165420510978, 0.18549285674215144, -1.4234588213812447, -2.676993077146229, -6.243154450671231, 4.131398732169466, 2.7219849118864485, 7.104243302245841, -4.665510539636515], 'W': [17.59470199016316, -8.797000906059067, -13.15977429572756, -0.09629552052013526, 0.899685224071618, 8.598417612228515, -0.8725223564231552, -6.083014409914341, 8.029039967062795, -0.7690225976346564], 'Y': [8.306170853151393, -3.296447711720766, -12.994878467511455, 0.3899788285279217, -7.434442094319724, 1.9577011691298234, 4.840809392992779, 0.18729583352391124, -2.6771911064073537, 1.4249135090465874], 'V': [15.949911456315235, 8.712350824188894, 6.134666244988656, 1.5306038174481802, -3.150713711095699, -6.396923914725749, 4.699987541685391, -1.077460568475175, -2.6380804104265887, -1.2636494873944912]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal11_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742086377, 5.659693734212293, 16.59920307640727, 1.309794956173267, 3.358756553625098, 1.9574310258597338, -3.6569027228538444, -0.6627086148591027, 3.5448467293745547, -2.28015551256229, 0.8253850829417329], 'R': [-8.1751863694736, -15.780858051446096, -1.1126192621775215, 0.4244890854752571, -8.777147893560992, -8.281651787808487, -3.2240499230410142, -5.524673141417414, -2.221851093944008, -5.846545601385167, 5.480146085485609], 'N': [-15.225239891019058, -0.04793258957997093, -4.124878352673221, -5.722295080397385, -1.943152171450043, 3.160409222539737, 2.6256212266098036, 5.522366181432286, -0.12654664349429204, 4.788495578177054, 2.0699480796290666], 'D': [-18.35346798997389, -2.9340346325773, -0.03834309007238008, -2.0130359823062145, 5.910450550347405, 5.475429725958882, 9.028241483968312, -1.4021128977046926, -2.0490134963281528, -1.371099782854503, 1.2736482171067727], 'C': [8.222843193809991, 8.194821816409627, -7.137235317851975, -14.957017737863634, 10.512318402717192, -8.495575154730764, -1.8124387957732138, -5.003456069715578, -1.3075950043221314, 4.1631051678100714, 2.46043864404234], 'Q': [-7.86767563462863, -9.07602717923579, 1.8276801249102548, -0.35157096714858105, 1.4703551942537108, -2.490121997903042, -1.134403714618015, 0.44656585584874786, 0.4627815179372493, 0.36051351327214404, -5.924370748393014], 'E': [-12.05805488086636, -11.669599326113449, 11.278427001230769, 2.8029109090065525, 8.313613574599787, 3.5150335889209416, 4.805804948786773, -3.305696675390368, -2.664434406210708, -1.1018335010042941, -0.07949936179825726], 'G': [-16.404901187801585, 21.593461636504692, 1.4058083342048509, -5.276889954049384, -8.113453005934879, 9.225328286159467, -5.8091732977597585, -4.253510380685102, -3.683105640188362, -1.8804761670436765, -1.3964992311786877], 'H': [-0.3293056231310357, -8.348482503523778, -4.903228442934724, -4.586996096585348, 0.937359601950763, 0.7175895985637417, -4.271917130801603, 9.329191924688189, -3.112185534062377, -2.9973171020077403, 1.7984550610310255], 'I': [20.67412975580204, 5.865712858513276, 2.7753309285104577, 3.819351167358547, -2.760304951650107, -2.009496639472542, 4.0138437132994556, 0.009658360646254152, -3.909491707357832, 1.2515059014515753, -3.5142479145711567], 'L': [17.902498302961096, 3.2666672048004104, 11.474198681825122, 6.582298908638001, -1.4027275618621051, 1.898709231658844, -0.40278169709884043, 0.8865619205386727, 1.6335919603390237, 4.810411896313676, 7.286483914048553], 'K': [-11.693940511387973, -13.413683614641144, 5.794685498068505, 1.7220399377813527, -6.102149735719829, -2.025801598657137, -4.588978122511426, -1.1381017632844257, 1.0363142137598191, 8.91828268700275, -3.3868899262497054], 'M': [16.174830528202108, -4.760543431233182, 3.1780613579676213, -2.2992528102210916, 6.94505954989081, 1.9684904609094478, -7.026278908301174, 3.4501597803106794, -1.643862737641889, -2.9283763870147026, -4.0515577670699], 'F': [19.44105221572265, -0.46427598198953557, -3.9881594026874487, 2.5372819350191764, -1.1799767423052847, 4.132039528346389, 0.6080239521216921, 2.4067175076814324, -1.2231610256216494, -1.3304606151033629, 3.46149702722976], 'P': [-16.81670112171998, 11.21975995148229, -14.556461364270183, 19.034938348204154, 7.4384034780861885, -3.780853277114133, -3.754224482353598, 0.07407835444765773, -0.7941886462122633, 0.6180717612091233, 0.30671321178721006], 'S': [-12.651824913994753, 8.064252481497574, 1.3620238910508264, -3.426874923159336, -2.24494118449767, -2.8830006291924453, 1.7999401599018998, 3.4161538904420263, 6.239889761498337, 0.09914727935835915, 1.1097056794879663], 'T': [-4.930504019551634, 6.01216542051098, 0.1854928567421631, -1.4234588213812576, -2.6769930771462382, -6.243154450671237, 4.131398732169486, 2.721984911886358, 7.104243302245889, -4.665510539636497, -2.0805773990370064], 'W': [17.594701990163138, -8.797000906059045, -13.159774295727598, -0.09629552052012368, 0.8996852240716227, 8.598417612228547, -0.8725223564231233, -6.083014409914443, 8.029039967062754, -0.7690225976346016, -1.2250784520168914], 'Y': [8.306170853151368, -3.2964477117207402, -12.994878467511485, 0.3899788285279031, -7.434442094319727, 1.957701169129825, 4.840809392992772, 0.1872958335239829, -2.6771911064073706, 1.4249135090465834, -1.453146271385794], 'V': [15.94991145631523, 8.712350824188897, 6.1346662449886695, 1.5306038174481722, -3.1507137110957095, -6.396923914725761, 4.699987541685386, -1.0774605684751533, -2.6380804104265945, -1.2636494873945037, -2.9605539310896183]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal12_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742087527, 5.659693734212318, 16.599203076407214, 1.3097949561732949, 3.358756553625086, 1.9574310258597358, -3.65690272285385, -0.6627086148590933, 3.544846729374556, -2.2801555125623136, 0.8253850829416989, 1.6956191074545128], 'R': [-8.175186369473607, -15.78085805144605, -1.112619262177484, 0.4244890854751909, -8.777147893560938, -8.281651787808523, -3.224049923041027, -5.524673141417372, -2.2218510939440157, -5.846545601385202, 5.4801460854856225, -2.2087679573025043], 'N': [-15.225239891019076, -0.04793258957998506, -4.124878352673197, -5.722295080397386, -1.9431521714500253, 3.1604092225397484, 2.62562122660982, 5.522366181432258, -0.1265466434942833, 4.788495578177066, 2.0699480796291185, -2.1408222208405476], 'D': [-18.353467989973907, -2.934034632577283, -0.03834309007237307, -2.013035982306166, 5.910450550347373, 5.475429725958903, 9.028241483968296, -1.4021128977046933, -2.049013496328138, -1.3710997828545128, 1.2736482171067887, -1.4736228203699995], 'C': [8.222843193809991, 8.1948218164096, -7.137235317851964, -14.957017737863596, 10.512318402717286, -8.495575154730693, -1.8124387957732024, -5.003456069715585, -1.3075950043221425, 4.163105167810035, 2.4604386440423656, 0.5891767020792053], 'Q': [-7.867675634628633, -9.076027179235759, 1.827680124910273, -0.3515709671485872, 1.4703551942537239, -2.490121997903035, -1.1344037146180177, 0.4465658558487306, 0.4627815179372444, 0.3605135132722124, -5.924370748392944, -3.6538672935748595], 'E': [-12.058054880866345, -11.669599326113396, 11.278427001230753, 2.8029109090066004, 8.313613574599746, 3.5150335889209727, 4.805804948786762, -3.305696675390377, -2.6644344062107095, -1.1018335010043347, -0.07949936179829532, 2.42495072150014], 'G': [-16.404901187801585, 21.593461636504642, 1.4058083342048122, -5.276889954049387, -8.113453005934891, 9.22532828615942, -5.809173297759762, -4.253510380685095, -3.6831056401883635, -1.8804761670436805, -1.3964992311787023, 0.40077345634151856], 'H': [-0.32930562313103867, -8.348482503523769, -4.903228442934688, -4.5869960965853425, 0.9373596019507724, 0.7175895985637591, -4.271917130801582, 9.329191924688212, -3.1121855340623954, -2.9973171020077864, 1.7984550610308339, 8.193618852240737], 'I': [20.67412975580202, 5.865712858513258, 2.775330928510439, 3.819351167358534, -2.760304951650117, -2.0094966394725438, 4.0138437132994556, 0.009658360646235419, -3.9094917073578297, 1.2515059014515866, -3.514247914571201, 1.4236211976619502], 'L': [17.90249830296109, 3.2666672048004313, 11.474198681825076, 6.58229890863799, -1.402727561862142, 1.8987092316588297, -0.4027816970988299, 0.8865619205386698, 1.6335919603390299, 4.810411896313634, 7.286483914048628, -1.8597715772739112], 'K': [-11.693940511387964, -13.413683614641098, 5.794685498068522, 1.7220399377813111, -6.1021497357198164, -2.025801598657164, -4.588978122511408, -1.1381017632844586, 1.036314213759821, 8.918282687002767, -3.3868899262497, 2.445482415862785], 'M': [16.17483052820211, -4.760543431233161, 3.1780613579676174, -2.299252810221059, 6.945059549890801, 1.9684904609094727, -7.026278908301152, 3.450159780310687, -1.6438627376418753, -2.9283763870145942, -4.051557767069803, -6.848073056941382], 'F': [19.441052215722625, -0.46427598198954895, -3.9881594026874474, 2.5372819350191773, -1.1799767423053185, 4.132039528346374, 0.6080239521216999, 2.4067175076814515, -1.2231610256216483, -1.3304606151033682, 3.4614970272298025, -2.1535092951188934], 'P': [-16.816701121719962, 11.219759951482219, -14.556461364270172, 19.03493834820416, 7.438403478086129, -3.7808532771141268, -3.754224482353598, 0.07407835444766454, -0.7941886462122649, 0.6180717612091272, 0.3067132117872054, 0.31894176681861236], 'S': [-12.65182491399474, 8.06425248149756, 1.3620238910508093, -3.426874923159352, -2.2449411844976326, -2.883000629192444, 1.7999401599019085, 3.4161538904420214, 6.23988976149835, 0.09914727935837467, 1.1097056794880118, -1.9040513259346885], 'T': [-4.930504019551636, 6.01216542051097, 0.18549285674215454, -1.4234588213812838, -2.6769930771461956, -6.243154450671237, 4.1313987321694725, 2.721984911886373, 7.104243302245885, -4.665510539636489, -2.0805773990370566, 0.27965431584441974], 'W': [17.59470199016313, -8.797000906059061, -13.159774295727551, -0.09629552052011171, 0.8996852240715781, 8.598417612228523, -0.8725223564231467, -6.083014409914446, 8.029039967062749, -0.7690225976346472, -1.2250784520169573, 3.34849573649306], 'Y': [8.306170853151345, -3.2964477117207656, -12.994878467511436, 0.3899788285278737, -7.434442094319731, 1.9577011691297999, 4.840809392992773, 0.18729583352395446, -2.6771911064073475, 1.4249135090466076, -1.4531462713857137, -2.3216947034861346], 'V': [15.949911456315212, 8.712350824188887, 6.134666244988643, 1.530603817448142, -3.1507137110956807, -6.39692391472576, 4.69998754168538, -1.0774605684751573, -2.6380804104266002, -1.2636494873945094, -2.9605539310896924, 3.4438459785459794]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal13_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742085167, 5.6596937342123255, 16.599203076407232, 1.3097949561732776, 3.3587565536250956, 1.957431025859738, -3.6569027228538467, -0.6627086148590896, 3.5448467293746, -2.2801555125622386, 0.8253850829417241, 1.6956191074545166, 1.87099479312118], 'R': [-8.175186369473595, -15.780858051446039, -1.112619262177477, 0.42448908547521175, -8.777147893560969, -8.281651787808517, -3.2240499230410253, -5.524673141417417, -2.2218510939439082, -5.846545601385208, 5.480146085485607, -2.2087679573025456, -2.2867795277902623], 'N': [-15.225239891019044, -0.0479325895799883, -4.124878352673219, -5.7222950803973935, -1.9431521714500395, 3.1604092225397484, 2.6256212266098173, 5.522366181432285, -0.12654664349437483, 4.78849557817706, 2.069948079629076, -2.1408222208406174, -4.798142757795578], 'D': [-18.353467989973886, -2.9340346325772857, -0.03834309007238787, -2.013035982306189, 5.910450550347393, 5.475429725958888, 9.02824148396832, -1.402112897704702, -2.049013496328104, -1.3710997828545324, 1.2736482171067753, -1.473622820370023, -3.2876774639408515], 'C': [8.222843193810004, 8.194821816409595, -7.137235317851969, -14.957017737863637, 10.512318402717257, -8.495575154730732, -1.8124387957732175, -5.003456069715592, -1.3075950043221864, 4.163105167810055, 2.4604386440423354, 0.5891767020791365, 1.3155866650084262], 'Q': [-7.867675634628637, -9.076027179235766, 1.8276801249102628, -0.35157096714857966, 1.4703551942537254, -2.4901219979030422, -1.1344037146180184, 0.4465658558487635, 0.4627815179372362, 0.360513513272142, -5.924370748393009, -3.653867293574778, 2.1647956341279095], 'E': [-12.05805488086637, -11.669599326113394, 11.27842700123075, 2.802910909006568, 8.313613574599763, 3.515033588920953, 4.805804948786773, -3.305696675390393, -2.664434406210686, -1.1018335010043554, -0.07949936179826143, 2.424950721500168, 3.4836634582423733], 'G': [-16.404901187801574, 21.59346163650464, 1.4058083342048024, -5.276889954049381, -8.113453005934899, 9.225328286159465, -5.8091732977597506, -4.253510380685115, -3.68310564018833, -1.8804761670437338, -1.3964992311786955, 0.40077345634155215, 0.45205071350075143], 'H': [-0.3293056231310341, -8.348482503523773, -4.903228442934697, -4.586996096585349, 0.9373596019507738, 0.7175895985637512, -4.271917130801597, 9.329191924688166, -3.1121855340623727, -2.9973171020077727, 1.7984550610309844, 8.19361885224075, 1.1121971322139563], 'I': [20.674129755802028, 5.865712858513264, 2.775330928510448, 3.8193511673585427, -2.7603049516501144, -2.0094966394725438, 4.013843713299461, 0.009658360646233813, -3.9094917073578506, 1.2515059014515373, -3.5142479145711727, 1.4236211976619804, -4.879615956629557], 'L': [17.90249830296108, 3.2666672048004313, 11.474198681825095, 6.582298908637995, -1.402727561862129, 1.8987092316588456, -0.402781697098826, 0.8865619205386736, 1.6335919603389544, 4.810411896313686, 7.286483914048556, -1.8597715772740389, 1.3991502513455545], 'K': [-11.693940511387973, -13.4136836146411, 5.794685498068511, 1.7220399377813331, -6.102149735719842, -2.025801598657148, -4.58897812251142, -1.1381017632844332, 1.0363142137596926, 8.918282687002794, -3.386889926249698, 2.4454824158627524, -1.0078157135086718], 'M': [16.1748305282021, -4.760543431233166, 3.178061357967623, -2.29925281022107, 6.945059549890811, 1.9684904609094724, -7.026278908301164, 3.4501597803106927, -1.643862737641869, -2.9283763870147106, -4.051557767069867, -6.848073056941297, -1.6950582237686727], 'F': [19.441052215722635, -0.4642759819895579, -3.9881594026874487, 2.5372819350191906, -1.1799767423053074, 4.13203952834639, 0.6080239521217059, 2.4067175076814302, -1.2231610256216472, -1.3304606151033975, 3.461497027229794, -2.15350929511892, 0.037676860194824593], 'P': [-16.81670112171996, 11.219759951482224, -14.556461364270172, 19.034938348204182, 7.438403478086173, -3.780853277114127, -3.7542244823536026, 0.07407835444764882, -0.7941886462122774, 0.6180717612091232, 0.3067132117872115, 0.31894176681860253, -0.32930456086545806], 'S': [-12.651824913994743, 8.064252481497567, 1.362023891050806, -3.426874923159351, -2.244941184497654, -2.8830006291924497, 1.7999401599018952, 3.416153890442055, 6.23988976149833, 0.09914727935843928, 1.1097056794879925, -1.9040513259346985, 1.6891065450743243], 'T': [-4.930504019551628, 6.012165420510973, 0.18549285674215238, -1.4234588213812764, -2.6769930771462027, -6.2431544506712635, 4.131398732169467, 2.7219849118863997, 7.104243302245949, -4.665510539636403, -2.080577399037021, 0.2796543158444729, 0.002984662431238778], 'W': [17.594701990163134, -8.797000906059065, -13.159774295727553, -0.09629552052011728, 0.899685224071597, 8.598417612228534, -0.8725223564231314, -6.083014409914415, 8.029039967062802, -0.7690225976344902, -1.2250784520169193, 3.348495736493046, -2.338673594870632], 'Y': [8.306170853151379, -3.296447711720776, -12.994878467511446, 0.38997882852788435, -7.434442094319746, 1.9577011691298098, 4.840809392992781, 0.1872958335239627, -2.677191106407384, 1.4249135090465264, -1.4531462713857672, -2.3216947034860853, 8.10727449216113], 'V': [15.949911456315215, 8.712350824188889, 6.134666244988648, 1.5306038174481549, -3.150713711095701, -6.396923914725771, 4.69998754168538, -1.0774605684751668, -2.6380804104265647, -1.2636494873945316, -2.960553931089635, 3.4438459785460127, -1.0124134082519871]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal14_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742085373, 5.659693734212369, 16.599203076407264, 1.3097949561732751, 3.358756553625094, 1.9574310258597385, -3.656902722853842, -0.6627086148590239, 3.5448467293745725, -2.280155512562293, 0.8253850829416851, 1.695619107454513, 1.8709947931211763, 1.795620278840329], 'R': [-8.175186369473613, -15.7808580514461, -1.1126192621774584, 0.42448908547520875, -8.777147893560969, -8.28165178780849, -3.224049923041033, -5.5246731414174155, -2.221851093943926, -5.846545601385241, 5.480146085485637, -2.2087679573024404, -2.286779527790273, 0.07716995296012379], 'N': [-15.2252398910191, -0.047932589579981845, -4.12487835267323, -5.722295080397383, -1.94315217145004, 3.1604092225397387, 2.6256212266098187, 5.5223661814322575, -0.12654664349436512, 4.788495578177069, 2.0699480796291323, -2.14082222084058, -4.798142757795596, -3.7267342942888195], 'D': [-18.35346798997392, -2.9340346325772946, -0.03834309007237283, -2.013035982306174, 5.910450550347373, 5.475429725958871, 9.028241483968317, -1.4021128977047237, -2.0490134963281212, -1.3710997828545177, 1.2736482171068062, -1.4736228203699977, -3.2876774639408475, 5.744850091534423], 'C': [8.222843193810034, 8.1948218164096, -7.137235317851995, -14.957017737863623, 10.512318402717234, -8.495575154730725, -1.812438795773209, -5.003456069715619, -1.3075950043220739, 4.163105167810046, 2.460438644042353, 0.5891767020791989, 1.3155866650084225, -0.40010247542695304], 'Q': [-7.867675634628655, -9.076027179235794, 1.827680124910283, -0.3515709671485827, 1.4703551942537196, -2.49012199790304, -1.134403714618014, 0.4465658558487318, 0.4627815179372313, 0.36051351327217657, -5.924370748392929, -3.6538672935748897, 2.164795634127901, -4.825405123361882], 'E': [-12.058054880866383, -11.66959932611342, 11.2784270012308, 2.8029109090065734, 8.313613574599753, 3.5150335889209474, 4.805804948786779, -3.3056966753904167, -2.664434406210675, -1.1018335010043305, -0.07949936179831031, 2.4249507215001698, 3.483663458242367, -4.023734016649611], 'G': [-16.404901187801638, 21.59346163650472, 1.4058083342047794, -5.276889954049394, -8.11345300593487, 9.225328286159442, -5.809173297759765, -4.253510380685157, -3.6831056401883115, -1.8804761670436942, -1.3964992311787054, 0.4007734563415437, 0.4520507135007506, -1.1799294926100519], 'H': [-0.329305623131032, -8.348482503523806, -4.9032284429347, -4.586996096585353, 0.9373596019507784, 0.7175895985637534, -4.271917130801586, 9.329191924688175, -3.112185534062495, -2.997317102007727, 1.79845506103082, 8.19361885224077, 1.1121971322139925, 0.9434686249645998], 'I': [20.67412975580208, 5.86571285851329, 2.775330928510443, 3.8193511673585463, -2.7603049516501206, -2.0094966394725535, 4.013843713299453, 0.009658360646187605, -3.909491707357833, 1.2515059014515864, -3.514247914571193, 1.4236211976619264, -4.879615956629567, -0.7123666242815694], 'L': [17.902498302961146, 3.2666672048004566, 11.47419868182511, 6.582298908637994, -1.4027275618621298, 1.898709231658842, -0.4027816970988319, 0.8865619205386771, 1.6335919603390074, 4.810411896313641, 7.286483914048631, -1.859771577273909, 1.399150251345558, 0.3290423275700568], 'K': [-11.693940511387998, -13.413683614641148, 5.794685498068544, 1.7220399377813287, -6.102149735719822, -2.025801598657149, -4.588978122511422, -1.138101763284438, 1.0363142137598316, 8.918282687002803, -3.3868899262497227, 2.445482415862719, -1.0078157135086594, 2.7691591179397945], 'M': [16.17483052820215, -4.760543431233183, 3.178061357967638, -2.2992528102210845, 6.945059549890812, 1.9684904609094578, -7.026278908301163, 3.450159780310654, -1.6438627376419366, -2.9283763870146604, -4.051557767069772, -6.848073056941374, -1.695058223768678, 3.1870785038217533], 'F': [19.441052215722696, -0.46427598198955816, -3.988159402687446, 2.5372819350191764, -1.1799767423053014, 4.132039528346377, 0.6080239521216918, 2.4067175076814147, -1.2231610256216898, -1.3304606151033744, 3.4614970272298073, -2.153509295118879, 0.03767686019481772, -3.8638472974164286], 'P': [-16.816701121720005, 11.219759951482231, -14.556461364270206, 19.03493834820418, 7.438403478086164, -3.780853277114127, -3.7542244823536066, 0.07407835444764418, -0.7941886462122645, 0.6180717612091251, 0.30671321178720395, 0.31894176681860914, -0.3293045608654588, 0.20116264246860302], 'S': [-12.651824913994776, 8.0642524814976, 1.362023891050795, -3.4268749231593514, -2.244941184497649, -2.8830006291924413, 1.7999401599018998, 3.4161538904421205, 6.239889761498295, 0.09914727935837615, 1.1097056794880127, -1.9040513259347025, 1.6891065450743188, 1.4532692426420577], 'T': [-4.93050401955164, 6.012165420510998, 0.1854928567421427, -1.4234588213812744, -2.6769930771462125, -6.2431544506712235, 4.131398732169474, 2.7219849118864947, 7.104243302245856, -4.665510539636477, -2.080577399037046, 0.27965431584443756, 0.0029846624312665064, -1.003472547874272], 'W': [17.594701990163184, -8.797000906059122, -13.159774295727566, -0.09629552052011793, 0.8996852240716094, 8.598417612228532, -0.8725223564231257, -6.083014409914307, 8.029039967062838, -0.7690225976346089, -1.2250784520169766, 3.3484957364930485, -2.33867359487063, -0.3865847812522643], 'Y': [8.306170853151405, -3.296447711720795, -12.994878467511464, 0.38997882852789356, -7.434442094319737, 1.9577011691298083, 4.840809392992772, 0.1872958335239113, -2.6771911064073755, 1.424913509046582, -1.4531462713857208, -2.32169470348614, 8.107274492161126, 2.9688959980365754], 'V': [15.949911456315258, 8.71235082418893, 6.1346662449886455, 1.5306038174481542, -3.1507137110957055, -6.396923914725768, 4.699987541685375, -1.0774605684751906, -2.638080410426571, -1.2636494873944888, -2.960553931089711, 3.443845978545984, -1.0124134082519811, 0.6524598723835228]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal15_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.2406638474208436, 5.6596937342123805, 16.599203076407225, 1.3097949561732867, 3.3587565536250983, 1.9574310258597356, -3.656902722853848, -0.6627086148590956, 3.5448467293745374, -2.2801555125623305, 0.8253850829417291, 1.6956191074544977, 1.870994793121202, 1.795620278840332, 4.6247949395093], 'R': [-8.175186369473591, -15.780858051446069, -1.1126192621774433, 0.4244890854752155, -8.777147893561017, -8.281651787808457, -3.22404992304101, -5.524673141417389, -2.221851093944031, -5.846545601385181, 5.480146085485582, -2.2087679573025967, -2.286779527790283, 0.07716995296012441, 1.5221918095074969], 'N': [-15.22523989101905, -0.047932589580011786, -4.124878352673219, -5.722295080397402, -1.9431521714500035, 3.160409222539739, 2.6256212266097934, 5.522366181432279, -0.126546643494247, 4.788495578177095, 2.0699480796290746, -2.140822220840575, -4.798142757795591, -3.726734294288805, 4.803544704453361], 'D': [-18.353467989973904, -2.9340346325773043, -0.038343090072368045, -2.013035982306186, 5.910450550347414, 5.475429725958846, 9.028241483968314, -1.402112897704668, -2.0490134963281363, -1.3710997828544877, 1.273648217106748, -1.4736228203700428, -3.287677463940844, 5.7448500915344205, -2.7722501383097793], 'C': [8.222843193810014, 8.194821816409586, -7.137235317851989, -14.957017737863586, 10.512318402717238, -8.495575154730782, -1.812438795773199, -5.003456069715615, -1.307595004322109, 4.163105167810047, 2.4604386440423576, 0.5891767020791554, 1.3155866650084254, -0.4001024754269525, -0.7680497343338267], 'Q': [-7.867675634628632, -9.076027179235778, 1.8276801249102885, -0.35157096714857855, 1.4703551942537083, -2.490121997903048, -1.1344037146180257, 0.44656585584874275, 0.4627815179372565, 0.360513513272185, -5.9243707483929935, -3.6538672935747494, 2.164795634127882, -4.8254051233618815, -0.7097513895003064], 'E': [-12.05805488086637, -11.669599326113381, 11.278427001230792, 2.8029109090065862, 8.313613574599774, 3.515033588920916, 4.805804948786778, -3.305696675390373, -2.664434406210728, -1.1018335010043254, -0.0794993617982487, 2.424950721500144, 3.4836634582423636, -4.02373401664962, 0.4413802931369726], 'G': [-16.40490118780161, 21.593461636504696, 1.4058083342047476, -5.27688995404942, -8.11345300593483, 9.225328286159476, -5.809173297759736, -4.2535103806851104, -3.6831056401883817, -1.8804761670436694, -1.3964992311786892, 0.4007734563415389, 0.4520507135007416, -1.179929492610053, -1.804248936124283], 'H': [-0.32930562313102696, -8.348482503523798, -4.903228442934677, -4.58699609658535, 0.937359601950791, 0.7175895985637434, -4.271917130801603, 9.329191924688194, -3.112185534062454, -2.99731710200775, 1.7984550610310055, 8.193618852240702, 1.1121971322140005, 0.9434686249646106, -0.6188479041152979], 'I': [20.674129755802056, 5.865712858513278, 2.775330928510438, 3.8193511673585436, -2.7603049516501343, -2.0094966394725384, 4.01384371329945, 0.009658360646242377, -3.9094917073578124, 1.2515059014516094, -3.514247914571159, 1.423621197662014, -4.87961595662956, -0.7123666242815702, 1.502156953444114], 'L': [17.902498302961096, 3.266667204800472, 11.474198681825097, 6.582298908637997, -1.4027275618621327, 1.898709231658861, -0.4027816970988306, 0.886561920538661, 1.6335919603390496, 4.810411896313641, 7.286483914048567, -1.8597715772740504, 1.39915025134555, 0.3290423275700605, -1.7356258143437289], 'K': [-11.693940511387988, -13.413683614641107, 5.794685498068545, 1.7220399377813236, -6.102149735719851, -2.0258015986571216, -4.588978122511417, -1.1381017632844765, 1.0363142137598704, 8.918282687002751, -3.386889926249675, 2.4454824158628363, -1.007815713508642, 2.7691591179397927, -3.1503194155842076], 'M': [16.174830528202126, -4.760543431233164, 3.178061357967642, -2.2992528102210716, 6.945059549890831, 1.9684904609094198, -7.026278908301167, 3.4501597803106816, -1.6438627376418855, -2.9283763870145996, -4.051557767069925, -6.848073056941293, -1.695058223768696, 3.1870785038217475, 0.2659485302105409], 'F': [19.441052215722667, -0.46427598198956893, -3.9881594026874394, 2.537281935019175, -1.1799767423052854, 4.1320395283463816, 0.6080239521216919, 2.406717507681451, -1.2231610256216707, -1.3304606151033482, 3.4614970272297603, -2.15350929511895, 0.037676860194812326, -3.8638472974164095, -4.491291473786314], 'P': [-16.816701121719976, 11.219759951482187, -14.5564613642702, 19.034938348204214, 7.438403478086114, -3.7808532771141317, -3.7542244823535964, 0.07407835444764632, -0.7941886462122619, 0.6180717612091308, 0.3067132117872086, 0.3189417668186057, -0.3293045608654606, 0.20116264246860607, 0.5042517432074558], 'S': [-12.651824913994762, 8.064252481497585, 1.3620238910507865, -3.4268749231593447, -2.2449411844976517, -2.8830006291924444, 1.7999401599018854, 3.41615389044204, 6.239889761498348, 0.09914727935834038, 1.1097056794879794, -1.904051325934693, 1.6891065450743166, 1.4532692426420473, 1.469176268041172], 'T': [-4.930504019551631, 6.012165420510983, 0.1854928567421343, -1.4234588213812585, -2.6769930771462467, -6.243154450671222, 4.131398732169452, 2.721984911886404, 7.104243302245867, -4.665510539636527, -2.0805773990370344, 0.27965431584447364, 0.0029846624312543923, -1.0034725478742716, -4.097677300296455], 'W': [17.594701990163166, -8.79700090605912, -13.159774295727523, -0.09629552052012318, 0.8996852240716435, 8.598417612228545, -0.8725223564231127, -6.083014409914428, 8.02903996706276, -0.7690225976346869, -1.2250784520168851, 3.348495736493085, -2.338673594870624, -0.38658478125226164, 1.3529747627705848], 'Y': [8.306170853151402, -3.2964477117208153, -12.994878467511445, 0.38997882852786847, -7.4344420943197305, 1.957701169129835, 4.840809392992768, 0.18729583352395768, -2.6771911064073572, 1.4249135090466025, -1.4531462713857788, -2.3216947034861217, 8.107274492161132, 2.9688959980365532, 2.527096141332837], 'V': [15.949911456315244, 8.712350824188924, 6.134666244988636, 1.530603817448164, -3.1507137110957357, -6.396923914725752, 4.699987541685383, -1.0774605684751455, -2.638080410426597, -1.2636494873944892, -2.9605539310896254, 3.4438459785460225, -1.0124134082519771, 0.6524598723835268, 1.1345459607803443]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal16_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742087102, 5.65969373421236, 16.599203076407214, 1.3097949561732931, 3.358756553625084, 1.9574310258597571, -3.6569027228538635, -0.6627086148591114, 3.544846729374479, -2.280155512562403, 0.82538508294174, 1.6956191074545237, 1.8709947931212176, 1.7956202788401412, 4.624794939509524, 4.865165649995154], 'R': [-8.1751863694736, -15.780858051446065, -1.1126192621774564, 0.42448908547520287, -8.777147893560988, -8.281651787808505, -3.2240499230410142, -5.524673141417414, -2.221851093944187, -5.846545601385111, 5.480146085485594, -2.208767957302579, -2.2867795277902725, 0.0771699529600886, 1.522191809507504, -0.2795020883848622], 'N': [-15.22523989101904, -0.04793258957999449, -4.124878352673211, -5.722295080397375, -1.943152171450024, 3.1604092225397404, 2.6256212266098076, 5.522366181432292, -0.12654664349412237, 4.788495578177072, 2.06994807962904, -2.1408222208406027, -4.798142757795651, -3.7267342942889576, 4.803544704453234, 0.49460722924826367], 'D': [-18.3534679899739, -2.9340346325773, -0.038343090072362765, -2.0130359823061816, 5.910450550347402, 5.475429725958868, 9.02824148396834, -1.4021128977046646, -2.0490134963281785, -1.3710997828544305, 1.2736482171067718, -1.4736228203700505, -3.2876774639407587, 5.74485009153458, -2.7722501383095097, 0.33919586417751246], 'C': [8.222843193810009, 8.194821816409581, -7.137235317851969, -14.95701773786359, 10.512318402717257, -8.49557515473075, -1.8124387957732289, -5.003456069715573, -1.307595004322032, 4.16310516781011, 2.460438644042358, 0.5891767020791588, 1.31558666500841, -0.4001024754269377, -0.7680497343338213, 0.5231489915952623], 'Q': [-7.86767563462864, -9.076027179235773, 1.8276801249102872, -0.3515709671485749, 1.4703551942537156, -2.4901219979030476, -1.1344037146180292, 0.44656585584873754, 0.46278151793726535, 0.3605135132721362, -5.924370748392997, -3.6538672935747614, 2.1647956341278394, -4.825405123361882, -0.7097513895004537, -1.6704381746659103], 'E': [-12.05805488086636, -11.66959932611338, 11.27842700123078, 2.8029109090065876, 8.313613574599769, 3.5150335889209448, 4.8058049487867915, -3.305696675390373, -2.6644344062107552, -1.1018335010042342, -0.07949936179825721, 2.4249507215001684, 3.4836634582423054, -4.023734016649684, 0.4413802931367269, -1.0866215419805514], 'G': [-16.40490118780157, 21.59346163650468, 1.4058083342047654, -5.276889954049405, -8.113453005934876, 9.225328286159483, -5.809173297759725, -4.253510380685115, -3.683105640188435, -1.8804761670435688, -1.396499231178691, 0.40077345634154043, 0.45205071350073417, -1.1799294926099881, -1.804248936124372, -1.498199969364454], 'H': [-0.32930562313103917, -8.348482503523796, -4.903228442934681, -4.586996096585343, 0.9373596019507745, 0.7175895985637661, -4.271917130801619, 9.329191924688194, -3.1121855340625055, -2.9973171020076905, 1.7984550610309922, 8.193618852240709, 1.1121971322139803, 0.9434686249646081, -0.6188479041152881, -1.797179190330819], 'I': [20.674129755802024, 5.865712858513279, 2.77533092851043, 3.8193511673585374, -2.760304951650122, -2.0094966394725553, 4.013843713299472, 0.009658360646252475, -3.9094917073577804, 1.2515059014516912, -3.514247914571189, 1.4236211976619801, -4.879615956629555, -0.7123666242815717, 1.5021569534440715, -0.815125752036846], 'L': [17.902498302961067, 3.266667204800459, 11.474198681825076, 6.582298908637983, -1.4027275618621406, 1.8987092316588465, -0.4027816970988318, 0.8865619205386887, 1.6335919603391567, 4.810411896313626, 7.28648391404857, -1.8597715772740406, 1.3991502513455407, 0.32904232757007684, -1.7356258143438157, -5.2512190365980835], 'K': [-11.693940511387973, -13.4136836146411, 5.794685498068537, 1.7220399377813245, -6.102149735719845, -2.025801598657147, -4.588978122511424, -1.138101763284427, 1.0363142137600792, 8.918282687002737, -3.386889926249688, 2.445482415862795, -1.0078157135086234, 2.769159117939951, -3.150319415584065, 2.5321809625838028], 'M': [16.1748305282021, -4.760543431233167, 3.17806135796763, -2.299252810221058, 6.945059549890812, 1.9684904609094764, -7.026278908301175, 3.4501597803106523, -1.6438627376419503, -2.928376387014608, -4.05155776706991, -6.848073056941311, -1.6950582237686103, 3.1870785038217564, 0.26594853021058606, -1.2648816668568925], 'F': [19.44105221572264, -0.464275981989561, -3.988159402687446, 2.537281935019179, -1.1799767423053042, 4.132039528346388, 0.6080239521217078, 2.4067175076814364, -1.223161025621695, -1.330460615103349, 3.4614970272297767, -2.1535092951189494, 0.037676860194751555, -3.8638472974161937, -4.491291473786294, 6.610581963399969], 'P': [-16.816701121719948, 11.219759951482212, -14.556461364270184, 19.034938348204168, 7.438403478086134, -3.7808532771141286, -3.75422448235361, 0.07407835444764826, -0.7941886462122442, 0.6180717612091464, 0.30671321178720506, 0.3189417668186018, -0.3293045608654604, 0.2011626424685875, 0.5042517432074641, 0.12364250949280928], 'S': [-12.65182491399473, 8.064252481497578, 1.362023891050795, -3.4268749231593563, -2.244941184497635, -2.8830006291924533, 1.7999401599018832, 3.416153890442027, 6.239889761498357, 0.09914727935817165, 1.1097056794879918, -1.9040513259346943, 1.689106545074352, 1.4532692426419622, 1.4691762680411948, -0.26213034855516365], 'T': [-4.930504019551623, 6.012165420510983, 0.18549285674214322, -1.4234588213812809, -2.6769930771462134, -6.243154450671257, 4.131398732169458, 2.721984911886361, 7.104243302245752, -4.665510539636705, -2.080577399037018, 0.27965431584448874, 0.0029846624312389242, -1.0034725478740898, -4.097677300296537, -0.8053382420499478], 'W': [17.594701990163117, -8.797000906059113, -13.159774295727528, -0.09629552052013261, 0.8996852240716142, 8.59841761222856, -0.8725223564231183, -6.083014409914444, 8.029039967062719, -0.7690225976348397, -1.2250784520168927, 3.348495736493084, -2.3386735948706407, -0.3865847812522959, 1.3529747627705346, -1.4446261527764146], 'Y': [8.30617085315137, -3.2964477117207984, -12.994878467511441, 0.38997882852787547, -7.4344420943197225, 1.9577011691298023, 4.840809392992785, 0.18729583352396598, -2.677191106407308, 1.424913509046673, -1.4531462713857608, -2.3216947034860853, 8.107274492161196, 2.96889599803636, 2.527096141332955, 0.489836783119848], 'V': [15.949911456315228, 8.71235082418892, 6.134666244988628, 1.5306038174481476, -3.1507137110957033, -6.396923914725789, 4.699987541685389, -1.077460568475139, -2.6380804104266264, -1.2636494873944244, -2.9605539310896325, 3.443845978546013, -1.0124134082519836, 0.6524598723834925, 1.1345459607803672, 0.1969022099873305]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal17_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742087102, 5.65969373421236, 16.599203076407214, 1.3097949561732931, 3.358756553625084, 1.9574310258597571, -3.6569027228538635, -0.6627086148591114, 3.544846729374479, -2.280155512562403, 0.82538508294174, 1.6956191074545237, 1.8709947931212176, 1.7956202788401412, 4.624794939509524, 4.865165649995154, 2.548715154595098], 'R': [-8.1751863694736, -15.780858051446065, -1.1126192621774564, 0.42448908547520287, -8.777147893560988, -8.281651787808505, -3.2240499230410142, -5.524673141417414, -2.221851093944187, -5.846545601385111, 5.480146085485594, -2.208767957302579, -2.2867795277902725, 0.0771699529600886, 1.522191809507504, -0.2795020883848622, -0.02562957115012471], 'N': [-15.22523989101904, -0.04793258957999449, -4.124878352673211, -5.722295080397375, -1.943152171450024, 3.1604092225397404, 2.6256212266098076, 5.522366181432292, -0.12654664349412237, 4.788495578177072, 2.06994807962904, -2.1408222208406027, -4.798142757795651, -3.7267342942889576, 4.803544704453234, 0.49460722924826367, -1.1341017129963245], 'D': [-18.3534679899739, -2.9340346325773, -0.038343090072362765, -2.0130359823061816, 5.910450550347402, 5.475429725958868, 9.02824148396834, -1.4021128977046646, -2.0490134963281785, -1.3710997828544305, 1.2736482171067718, -1.4736228203700505, -3.2876774639407587, 5.74485009153458, -2.7722501383095097, 0.33919586417751246, 3.125373362633658], 'C': [8.222843193810009, 8.194821816409581, -7.137235317851969, -14.95701773786359, 10.512318402717257, -8.49557515473075, -1.8124387957732289, -5.003456069715573, -1.307595004322032, 4.16310516781011, 2.460438644042358, 0.5891767020791588, 1.31558666500841, -0.4001024754269377, -0.7680497343338213, 0.5231489915952623, 0.037518544049185566], 'Q': [-7.86767563462864, -9.076027179235773, 1.8276801249102872, -0.3515709671485749, 1.4703551942537156, -2.4901219979030476, -1.1344037146180292, 0.44656585584873754, 0.46278151793726535, 0.3605135132721362, -5.924370748392997, -3.6538672935747614, 2.1647956341278394, -4.825405123361882, -0.7097513895004537, -1.6704381746659103, 6.587230743975947], 'E': [-12.05805488086636, -11.66959932611338, 11.27842700123078, 2.8029109090065876, 8.313613574599769, 3.5150335889209448, 4.8058049487867915, -3.305696675390373, -2.6644344062107552, -1.1018335010042342, -0.07949936179825721, 2.4249507215001684, 3.4836634582423054, -4.023734016649684, 0.4413802931367269, -1.0866215419805514, -4.779480704682543], 'G': [-16.40490118780157, 21.59346163650468, 1.4058083342047654, -5.276889954049405, -8.113453005934876, 9.225328286159483, -5.809173297759725, -4.253510380685115, -3.683105640188435, -1.8804761670435688, -1.396499231178691, 0.40077345634154043, 0.45205071350073417, -1.1799294926099881, -1.804248936124372, -1.498199969364454, -0.40736970304839426], 'H': [-0.32930562313103917, -8.348482503523796, -4.903228442934681, -4.586996096585343, 0.9373596019507745, 0.7175895985637661, -4.271917130801619, 9.329191924688194, -3.1121855340625055, -2.9973171020076905, 1.7984550610309922, 8.193618852240709, 1.1121971322139803, 0.9434686249646081, -0.6188479041152881, -1.797179190330819, 1.4340728018666498], 'I': [20.674129755802024, 5.865712858513279, 2.77533092851043, 3.8193511673585374, -2.760304951650122, -2.0094966394725553, 4.013843713299472, 0.009658360646252475, -3.9094917073577804, 1.2515059014516912, -3.514247914571189, 1.4236211976619801, -4.879615956629555, -0.7123666242815717, 1.5021569534440715, -0.815125752036846, 0.06547110269182106], 'L': [17.902498302961067, 3.266667204800459, 11.474198681825076, 6.582298908637983, -1.4027275618621406, 1.8987092316588465, -0.4027816970988318, 0.8865619205386887, 1.6335919603391567, 4.810411896313626, 7.28648391404857, -1.8597715772740406, 1.3991502513455407, 0.32904232757007684, -1.7356258143438157, -5.2512190365980835, 1.321607057153766], 'K': [-11.693940511387973, -13.4136836146411, 5.794685498068537, 1.7220399377813245, -6.102149735719845, -2.025801598657147, -4.588978122511424, -1.138101763284427, 1.0363142137600792, 8.918282687002737, -3.386889926249688, 2.445482415862795, -1.0078157135086234, 2.769159117939951, -3.150319415584065, 2.5321809625838028, -1.642009378971283], 'M': [16.1748305282021, -4.760543431233167, 3.17806135796763, -2.299252810221058, 6.945059549890812, 1.9684904609094764, -7.026278908301175, 3.4501597803106523, -1.6438627376419503, -2.928376387014608, -4.05155776706991, -6.848073056941311, -1.6950582237686103, 3.1870785038217564, 0.26594853021058606, -1.2648816668568925, -3.373258813794027], 'F': [19.44105221572264, -0.464275981989561, -3.988159402687446, 2.537281935019179, -1.1799767423053042, 4.132039528346388, 0.6080239521217078, 2.4067175076814364, -1.223161025621695, -1.330460615103349, 3.4614970272297767, -2.1535092951189494, 0.037676860194751555, -3.8638472974161937, -4.491291473786294, 6.610581963399969, 0.024105331442943582], 'P': [-16.816701121719948, 11.219759951482212, -14.556461364270184, 19.034938348204168, 7.438403478086134, -3.7808532771141286, -3.75422448235361, 0.07407835444764826, -0.7941886462122442, 0.6180717612091464, 0.30671321178720506, 0.3189417668186018, -0.3293045608654604, 0.2011626424685875, 0.5042517432074641, 0.12364250949280928, -0.23822718054431216], 'S': [-12.65182491399473, 8.064252481497578, 1.362023891050795, -3.4268749231593563, -2.244941184497635, -2.8830006291924533, 1.7999401599018832, 3.416153890442027, 6.239889761498357, 0.09914727935817165, 1.1097056794879918, -1.9040513259346943, 1.689106545074352, 1.4532692426419622, 1.4691762680411948, -0.26213034855516365, -1.1725897936370973], 'T': [-4.930504019551623, 6.012165420510983, 0.18549285674214322, -1.4234588213812809, -2.6769930771462134, -6.243154450671257, 4.131398732169458, 2.721984911886361, 7.104243302245752, -4.665510539636705, -2.080577399037018, 0.27965431584448874, 0.0029846624312389242, -1.0034725478740898, -4.097677300296537, -0.8053382420499478, -2.42420045996958], 'W': [17.594701990163117, -8.797000906059113, -13.159774295727528, -0.09629552052013261, 0.8996852240716142, 8.59841761222856, -0.8725223564231183, -6.083014409914444, 8.029039967062719, -0.7690225976348397, -1.2250784520168927, 3.348495736493084, -2.3386735948706407, -0.3865847812522959, 1.3529747627705346, -1.4446261527764146, 0.29485086573821634], 'Y': [8.30617085315137, -3.2964477117207984, -12.994878467511441, 0.38997882852787547, -7.4344420943197225, 1.9577011691298023, 4.840809392992785, 0.18729583352396598, -2.677191106407308, 1.424913509046673, -1.4531462713857608, -2.3216947034860853, 8.107274492161196, 2.96889599803636, 2.527096141332955, 0.489836783119848, -0.6154893575826234], 'V': [15.949911456315228, 8.71235082418892, 6.134666244988628, 1.5306038174481476, -3.1507137110957033, -6.396923914725789, 4.699987541685389, -1.077460568475139, -2.6380804104266264, -1.2636494873944244, -2.9605539310896325, 3.443845978546013, -1.0124134082519836, 0.6524598723834925, 1.1345459607803672, 0.1969022099873305, 0.37341171222902036]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal18_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742087102, 5.65969373421236, 16.599203076407214, 1.3097949561732931, 3.358756553625084, 1.9574310258597571, -3.6569027228538635, -0.6627086148591114, 3.544846729374479, -2.280155512562403, 0.82538508294174, 1.6956191074545237, 1.8709947931212176, 1.7956202788401412, 4.624794939509524, 4.865165649995154, 2.548715154595098, -2.448033256198294], 'R': [-8.1751863694736, -15.780858051446065, -1.1126192621774564, 0.42448908547520287, -8.777147893560988, -8.281651787808505, -3.2240499230410142, -5.524673141417414, -2.221851093944187, -5.846545601385111, 5.480146085485594, -2.208767957302579, -2.2867795277902725, 0.0771699529600886, 1.522191809507504, -0.2795020883848622, -0.02562957115012471, 0.03193169175826159], 'N': [-15.22523989101904, -0.04793258957999449, -4.124878352673211, -5.722295080397375, -1.943152171450024, 3.1604092225397404, 2.6256212266098076, 5.522366181432292, -0.12654664349412237, 4.788495578177072, 2.06994807962904, -2.1408222208406027, -4.798142757795651, -3.7267342942889576, 4.803544704453234, 0.49460722924826367, -1.1341017129963245, -1.922628739772712], 'D': [-18.3534679899739, -2.9340346325773, -0.038343090072362765, -2.0130359823061816, 5.910450550347402, 5.475429725958868, 9.02824148396834, -1.4021128977046646, -2.0490134963281785, -1.3710997828544305, 1.2736482171067718, -1.4736228203700505, -3.2876774639407587, 5.74485009153458, -2.7722501383095097, 0.33919586417751246, 3.125373362633658, -0.09577349311650397], 'C': [8.222843193810009, 8.194821816409581, -7.137235317851969, -14.95701773786359, 10.512318402717257, -8.49557515473075, -1.8124387957732289, -5.003456069715573, -1.307595004322032, 4.16310516781011, 2.460438644042358, 0.5891767020791588, 1.31558666500841, -0.4001024754269377, -0.7680497343338213, 0.5231489915952623, 0.037518544049185566, -0.6456117482416496], 'Q': [-7.86767563462864, -9.076027179235773, 1.8276801249102872, -0.3515709671485749, 1.4703551942537156, -2.4901219979030476, -1.1344037146180292, 0.44656585584873754, 0.46278151793726535, 0.3605135132721362, -5.924370748392997, -3.6538672935747614, 2.1647956341278394, -4.825405123361882, -0.7097513895004537, -1.6704381746659103, 6.587230743975947, 0.5760003218306804], 'E': [-12.05805488086636, -11.66959932611338, 11.27842700123078, 2.8029109090065876, 8.313613574599769, 3.5150335889209448, 4.8058049487867915, -3.305696675390373, -2.6644344062107552, -1.1018335010042342, -0.07949936179825721, 2.4249507215001684, 3.4836634582423054, -4.023734016649684, 0.4413802931367269, -1.0866215419805514, -4.779480704682543, 0.7827811900829793], 'G': [-16.40490118780157, 21.59346163650468, 1.4058083342047654, -5.276889954049405, -8.113453005934876, 9.225328286159483, -5.809173297759725, -4.253510380685115, -3.683105640188435, -1.8804761670435688, -1.396499231178691, 0.40077345634154043, 0.45205071350073417, -1.1799294926099881, -1.804248936124372, -1.498199969364454, -0.40736970304839426, 0.031239850280428937], 'H': [-0.32930562313103917, -8.348482503523796, -4.903228442934681, -4.586996096585343, 0.9373596019507745, 0.7175895985637661, -4.271917130801619, 9.329191924688194, -3.1121855340625055, -2.9973171020076905, 1.7984550610309922, 8.193618852240709, 1.1121971322139803, 0.9434686249646081, -0.6188479041152881, -1.797179190330819, 1.4340728018666498, 0.28628648289050035], 'I': [20.674129755802024, 5.865712858513279, 2.77533092851043, 3.8193511673585374, -2.760304951650122, -2.0094966394725553, 4.013843713299472, 0.009658360646252475, -3.9094917073577804, 1.2515059014516912, -3.514247914571189, 1.4236211976619801, -4.879615956629555, -0.7123666242815717, 1.5021569534440715, -0.815125752036846, 0.06547110269182106, -0.21483559437810043], 'L': [17.902498302961067, 3.266667204800459, 11.474198681825076, 6.582298908637983, -1.4027275618621406, 1.8987092316588465, -0.4027816970988318, 0.8865619205386887, 1.6335919603391567, 4.810411896313626, 7.28648391404857, -1.8597715772740406, 1.3991502513455407, 0.32904232757007684, -1.7356258143438157, -5.2512190365980835, 1.321607057153766, -0.9030437337605471], 'K': [-11.693940511387973, -13.4136836146411, 5.794685498068537, 1.7220399377813245, -6.102149735719845, -2.025801598657147, -4.588978122511424, -1.138101763284427, 1.0363142137600792, 8.918282687002737, -3.386889926249688, 2.445482415862795, -1.0078157135086234, 2.769159117939951, -3.150319415584065, 2.5321809625838028, -1.642009378971283, -0.006485944536307038], 'M': [16.1748305282021, -4.760543431233167, 3.17806135796763, -2.299252810221058, 6.945059549890812, 1.9684904609094764, -7.026278908301175, 3.4501597803106523, -1.6438627376419503, -2.928376387014608, -4.05155776706991, -6.848073056941311, -1.6950582237686103, 3.1870785038217564, 0.26594853021058606, -1.2648816668568925, -3.373258813794027, 0.10823590046473205], 'F': [19.44105221572264, -0.464275981989561, -3.988159402687446, 2.537281935019179, -1.1799767423053042, 4.132039528346388, 0.6080239521217078, 2.4067175076814364, -1.223161025621695, -1.330460615103349, 3.4614970272297767, -2.1535092951189494, 0.037676860194751555, -3.8638472974161937, -4.491291473786294, 6.610581963399969, 0.024105331442943582, 1.5906264303244464], 'P': [-16.816701121719948, 11.219759951482212, -14.556461364270184, 19.034938348204168, 7.438403478086134, -3.7808532771141286, -3.75422448235361, 0.07407835444764826, -0.7941886462122442, 0.6180717612091464, 0.30671321178720506, 0.3189417668186018, -0.3293045608654604, 0.2011626424685875, 0.5042517432074641, 0.12364250949280928, -0.23822718054431216, -0.13549398540410673], 'S': [-12.65182491399473, 8.064252481497578, 1.362023891050795, -3.4268749231593563, -2.244941184497635, -2.8830006291924533, 1.7999401599018832, 3.416153890442027, 6.239889761498357, 0.09914727935817165, 1.1097056794879918, -1.9040513259346943, 1.689106545074352, 1.4532692426419622, 1.4691762680411948, -0.26213034855516365, -1.1725897936370973, 6.2331641395780055], 'T': [-4.930504019551623, 6.012165420510983, 0.18549285674214322, -1.4234588213812809, -2.6769930771462134, -6.243154450671257, 4.131398732169458, 2.721984911886361, 7.104243302245752, -4.665510539636705, -2.080577399037018, 0.27965431584448874, 0.0029846624312389242, -1.0034725478740898, -4.097677300296537, -0.8053382420499478, -2.42420045996958, -4.37007859119585], 'W': [17.594701990163117, -8.797000906059113, -13.159774295727528, -0.09629552052013261, 0.8996852240716142, 8.59841761222856, -0.8725223564231183, -6.083014409914444, 8.029039967062719, -0.7690225976348397, -1.2250784520168927, 3.348495736493084, -2.3386735948706407, -0.3865847812522959, 1.3529747627705346, -1.4446261527764146, 0.29485086573821634, 0.7439520492228338], 'Y': [8.30617085315137, -3.2964477117207984, -12.994878467511441, 0.38997882852787547, -7.4344420943197225, 1.9577011691298023, 4.840809392992785, 0.18729583352396598, -2.677191106407308, 1.424913509046673, -1.4531462713857608, -2.3216947034860853, 8.107274492161196, 2.96889599803636, 2.527096141332955, 0.489836783119848, -0.6154893575826234, -1.8059266808244314], 'V': [15.949911456315228, 8.71235082418892, 6.134666244988628, 1.5306038174481476, -3.1507137110957033, -6.396923914725789, 4.699987541685389, -1.077460568475139, -2.6380804104266264, -1.2636494873944244, -2.9605539310896325, 3.443845978546013, -1.0124134082519836, 0.6524598723834925, 1.1345459607803672, 0.1969022099873305, 0.37341171222902036, 2.163693710995632]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal19_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742087102, 5.65969373421236, 16.599203076407214, 1.3097949561732931, 3.358756553625084, 1.9574310258597571, -3.6569027228538635, -0.6627086148591114, 3.544846729374479, -2.280155512562403, 0.82538508294174, 1.6956191074545237, 1.8709947931212176, 1.7956202788401412, 4.624794939509524, 4.865165649995154, 2.548715154595098, -2.448033256198294, -0.7211200028377479], 'R': [-8.1751863694736, -15.780858051446065, -1.1126192621774564, 0.42448908547520287, -8.777147893560988, -8.281651787808505, -3.2240499230410142, -5.524673141417414, -2.221851093944187, -5.846545601385111, 5.480146085485594, -2.208767957302579, -2.2867795277902725, 0.0771699529600886, 1.522191809507504, -0.2795020883848622, -0.02562957115012471, 0.03193169175826159, -0.37703069385595206], 'N': [-15.22523989101904, -0.04793258957999449, -4.124878352673211, -5.722295080397375, -1.943152171450024, 3.1604092225397404, 2.6256212266098076, 5.522366181432292, -0.12654664349412237, 4.788495578177072, 2.06994807962904, -2.1408222208406027, -4.798142757795651, -3.7267342942889576, 4.803544704453234, 0.49460722924826367, -1.1341017129963245, -1.922628739772712, 1.9634293264484624], 'D': [-18.3534679899739, -2.9340346325773, -0.038343090072362765, -2.0130359823061816, 5.910450550347402, 5.475429725958868, 9.02824148396834, -1.4021128977046646, -2.0490134963281785, -1.3710997828544305, 1.2736482171067718, -1.4736228203700505, -3.2876774639407587, 5.74485009153458, -2.7722501383095097, 0.33919586417751246, 3.125373362633658, -0.09577349311650397, 0.09627477426957656], 'C': [8.222843193810009, 8.194821816409581, -7.137235317851969, -14.95701773786359, 10.512318402717257, -8.49557515473075, -1.8124387957732289, -5.003456069715573, -1.307595004322032, 4.16310516781011, 2.460438644042358, 0.5891767020791588, 1.31558666500841, -0.4001024754269377, -0.7680497343338213, 0.5231489915952623, 0.037518544049185566, -0.6456117482416496, -0.5726639341136356], 'Q': [-7.86767563462864, -9.076027179235773, 1.8276801249102872, -0.3515709671485749, 1.4703551942537156, -2.4901219979030476, -1.1344037146180292, 0.44656585584873754, 0.46278151793726535, 0.3605135132721362, -5.924370748392997, -3.6538672935747614, 2.1647956341278394, -4.825405123361882, -0.7097513895004537, -1.6704381746659103, 6.587230743975947, 0.5760003218306804, 0.07661839025464307], 'E': [-12.05805488086636, -11.66959932611338, 11.27842700123078, 2.8029109090065876, 8.313613574599769, 3.5150335889209448, 4.8058049487867915, -3.305696675390373, -2.6644344062107552, -1.1018335010042342, -0.07949936179825721, 2.4249507215001684, 3.4836634582423054, -4.023734016649684, 0.4413802931367269, -1.0866215419805514, -4.779480704682543, 0.7827811900829793, -0.4209094265947445], 'G': [-16.40490118780157, 21.59346163650468, 1.4058083342047654, -5.276889954049405, -8.113453005934876, 9.225328286159483, -5.809173297759725, -4.253510380685115, -3.683105640188435, -1.8804761670435688, -1.396499231178691, 0.40077345634154043, 0.45205071350073417, -1.1799294926099881, -1.804248936124372, -1.498199969364454, -0.40736970304839426, 0.031239850280428937, 0.019641789249728085], 'H': [-0.32930562313103917, -8.348482503523796, -4.903228442934681, -4.586996096585343, 0.9373596019507745, 0.7175895985637661, -4.271917130801619, 9.329191924688194, -3.1121855340625055, -2.9973171020076905, 1.7984550610309922, 8.193618852240709, 1.1121971322139803, 0.9434686249646081, -0.6188479041152881, -1.797179190330819, 1.4340728018666498, 0.28628648289050035, -0.4876074357245765], 'I': [20.674129755802024, 5.865712858513279, 2.77533092851043, 3.8193511673585374, -2.760304951650122, -2.0094966394725553, 4.013843713299472, 0.009658360646252475, -3.9094917073577804, 1.2515059014516912, -3.514247914571189, 1.4236211976619801, -4.879615956629555, -0.7123666242815717, 1.5021569534440715, -0.815125752036846, 0.06547110269182106, -0.21483559437810043, -5.44972976354805], 'L': [17.902498302961067, 3.266667204800459, 11.474198681825076, 6.582298908637983, -1.4027275618621406, 1.8987092316588465, -0.4027816970988318, 0.8865619205386887, 1.6335919603391567, 4.810411896313626, 7.28648391404857, -1.8597715772740406, 1.3991502513455407, 0.32904232757007684, -1.7356258143438157, -5.2512190365980835, 1.321607057153766, -0.9030437337605471, 0.6246779566311597], 'K': [-11.693940511387973, -13.4136836146411, 5.794685498068537, 1.7220399377813245, -6.102149735719845, -2.025801598657147, -4.588978122511424, -1.138101763284427, 1.0363142137600792, 8.918282687002737, -3.386889926249688, 2.445482415862795, -1.0078157135086234, 2.769159117939951, -3.150319415584065, 2.5321809625838028, -1.642009378971283, -0.006485944536307038, 0.14886805410637788], 'M': [16.1748305282021, -4.760543431233167, 3.17806135796763, -2.299252810221058, 6.945059549890812, 1.9684904609094764, -7.026278908301175, 3.4501597803106523, -1.6438627376419503, -2.928376387014608, -4.05155776706991, -6.848073056941311, -1.6950582237686103, 3.1870785038217564, 0.26594853021058606, -1.2648816668568925, -3.373258813794027, 0.10823590046473205, 0.94215903292332], 'F': [19.44105221572264, -0.464275981989561, -3.988159402687446, 2.537281935019179, -1.1799767423053042, 4.132039528346388, 0.6080239521217078, 2.4067175076814364, -1.223161025621695, -1.330460615103349, 3.4614970272297767, -2.1535092951189494, 0.037676860194751555, -3.8638472974161937, -4.491291473786294, 6.610581963399969, 0.024105331442943582, 1.5906264303244464, 0.07304864170997138], 'P': [-16.816701121719948, 11.219759951482212, -14.556461364270184, 19.034938348204168, 7.438403478086134, -3.7808532771141286, -3.75422448235361, 0.07407835444764826, -0.7941886462122442, 0.6180717612091464, 0.30671321178720506, 0.3189417668186018, -0.3293045608654604, 0.2011626424685875, 0.5042517432074641, 0.12364250949280928, -0.23822718054431216, -0.13549398540410673, 0.3348737074322725], 'S': [-12.65182491399473, 8.064252481497578, 1.362023891050795, -3.4268749231593563, -2.244941184497635, -2.8830006291924533, 1.7999401599018832, 3.416153890442027, 6.239889761498357, 0.09914727935817165, 1.1097056794879918, -1.9040513259346943, 1.689106545074352, 1.4532692426419622, 1.4691762680411948, -0.26213034855516365, -1.1725897936370973, 6.2331641395780055, -1.9478319315839436], 'T': [-4.930504019551623, 6.012165420510983, 0.18549285674214322, -1.4234588213812809, -2.6769930771462134, -6.243154450671257, 4.131398732169458, 2.721984911886361, 7.104243302245752, -4.665510539636705, -2.080577399037018, 0.27965431584448874, 0.0029846624312389242, -1.0034725478740898, -4.097677300296537, -0.8053382420499478, -2.42420045996958, -4.37007859119585, -0.11824304499429703], 'W': [17.594701990163117, -8.797000906059113, -13.159774295727528, -0.09629552052013261, 0.8996852240716142, 8.59841761222856, -0.8725223564231183, -6.083014409914444, 8.029039967062719, -0.7690225976348397, -1.2250784520168927, 3.348495736493084, -2.3386735948706407, -0.3865847812522959, 1.3529747627705346, -1.4446261527764146, 0.29485086573821634, 0.7439520492228338, 0.6150806290716546], 'Y': [8.30617085315137, -3.2964477117207984, -12.994878467511441, 0.38997882852787547, -7.4344420943197225, 1.9577011691298023, 4.840809392992785, 0.18729583352396598, -2.677191106407308, 1.424913509046673, -1.4531462713857608, -2.3216947034860853, 8.107274492161196, 2.96889599803636, 2.527096141332955, 0.489836783119848, -0.6154893575826234, -1.8059266808244314, -0.3077371931220417], 'V': [15.949911456315228, 8.71235082418892, 6.134666244988628, 1.5306038174481476, -3.1507137110957033, -6.396923914725789, 4.699987541685389, -1.077460568475139, -2.6380804104266264, -1.2636494873944244, -2.9605539310896325, 3.443845978546013, -1.0124134082519836, 0.6524598723834925, 1.1345459607803672, 0.1969022099873305, 0.37341171222902036, 2.163693710995632, 5.508201124277812]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal20_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066384742087102, 5.65969373421236, 16.599203076407214, 1.3097949561732931, 3.358756553625084, 1.9574310258597571, -3.6569027228538635, -0.6627086148591114, 3.544846729374479, -2.280155512562403, 0.82538508294174, 1.6956191074545237, 1.8709947931212176, 1.7956202788401412, 4.624794939509524, 4.865165649995154, 2.548715154595098, -2.448033256198294, -0.7211200028377479, 1.222393873744638e-15], 'R': [-8.1751863694736, -15.780858051446065, -1.1126192621774564, 0.42448908547520287, -8.777147893560988, -8.281651787808505, -3.2240499230410142, -5.524673141417414, -2.221851093944187, -5.846545601385111, 5.480146085485594, -2.208767957302579, -2.2867795277902725, 0.0771699529600886, 1.522191809507504, -0.2795020883848622, -0.02562957115012471, 0.03193169175826159, -0.37703069385595206, 1.2223938737446385e-15], 'N': [-15.22523989101904, -0.04793258957999449, -4.124878352673211, -5.722295080397375, -1.943152171450024, 3.1604092225397404, 2.6256212266098076, 5.522366181432292, -0.12654664349412237, 4.788495578177072, 2.06994807962904, -2.1408222208406027, -4.798142757795651, -3.7267342942889576, 4.803544704453234, 0.49460722924826367, -1.1341017129963245, -1.922628739772712, 1.9634293264484624, 1.2223938737446383e-15], 'D': [-18.3534679899739, -2.9340346325773, -0.038343090072362765, -2.0130359823061816, 5.910450550347402, 5.475429725958868, 9.02824148396834, -1.4021128977046646, -2.0490134963281785, -1.3710997828544305, 1.2736482171067718, -1.4736228203700505, -3.2876774639407587, 5.74485009153458, -2.7722501383095097, 0.33919586417751246, 3.125373362633658, -0.09577349311650397, 0.09627477426957656, 1.2223938737446387e-15], 'C': [8.222843193810009, 8.194821816409581, -7.137235317851969, -14.95701773786359, 10.512318402717257, -8.49557515473075, -1.8124387957732289, -5.003456069715573, -1.307595004322032, 4.16310516781011, 2.460438644042358, 0.5891767020791588, 1.31558666500841, -0.4001024754269377, -0.7680497343338213, 0.5231489915952623, 0.037518544049185566, -0.6456117482416496, -0.5726639341136356, 1.222393873744639e-15], 'Q': [-7.86767563462864, -9.076027179235773, 1.8276801249102872, -0.3515709671485749, 1.4703551942537156, -2.4901219979030476, -1.1344037146180292, 0.44656585584873754, 0.46278151793726535, 0.3605135132721362, -5.924370748392997, -3.6538672935747614, 2.1647956341278394, -4.825405123361882, -0.7097513895004537, -1.6704381746659103, 6.587230743975947, 0.5760003218306804, 0.07661839025464307, 1.222393873744639e-15], 'E': [-12.05805488086636, -11.66959932611338, 11.27842700123078, 2.8029109090065876, 8.313613574599769, 3.5150335889209448, 4.8058049487867915, -3.305696675390373, -2.6644344062107552, -1.1018335010042342, -0.07949936179825721, 2.4249507215001684, 3.4836634582423054, -4.023734016649684, 0.4413802931367269, -1.0866215419805514, -4.779480704682543, 0.7827811900829793, -0.4209094265947445, 1.2223938737446387e-15], 'G': [-16.40490118780157, 21.59346163650468, 1.4058083342047654, -5.276889954049405, -8.113453005934876, 9.225328286159483, -5.809173297759725, -4.253510380685115, -3.683105640188435, -1.8804761670435688, -1.396499231178691, 0.40077345634154043, 0.45205071350073417, -1.1799294926099881, -1.804248936124372, -1.498199969364454, -0.40736970304839426, 0.031239850280428937, 0.019641789249728085, 1.2223938737446387e-15], 'H': [-0.32930562313103917, -8.348482503523796, -4.903228442934681, -4.586996096585343, 0.9373596019507745, 0.7175895985637661, -4.271917130801619, 9.329191924688194, -3.1121855340625055, -2.9973171020076905, 1.7984550610309922, 8.193618852240709, 1.1121971322139803, 0.9434686249646081, -0.6188479041152881, -1.797179190330819, 1.4340728018666498, 0.28628648289050035, -0.4876074357245765, 1.222393873744639e-15], 'I': [20.674129755802024, 5.865712858513279, 2.77533092851043, 3.8193511673585374, -2.760304951650122, -2.0094966394725553, 4.013843713299472, 0.009658360646252475, -3.9094917073577804, 1.2515059014516912, -3.514247914571189, 1.4236211976619801, -4.879615956629555, -0.7123666242815717, 1.5021569534440715, -0.815125752036846, 0.06547110269182106, -0.21483559437810043, -5.44972976354805, 1.2223938737446387e-15], 'L': [17.902498302961067, 3.266667204800459, 11.474198681825076, 6.582298908637983, -1.4027275618621406, 1.8987092316588465, -0.4027816970988318, 0.8865619205386887, 1.6335919603391567, 4.810411896313626, 7.28648391404857, -1.8597715772740406, 1.3991502513455407, 0.32904232757007684, -1.7356258143438157, -5.2512190365980835, 1.321607057153766, -0.9030437337605471, 0.6246779566311597, 1.2223938737446385e-15], 'K': [-11.693940511387973, -13.4136836146411, 5.794685498068537, 1.7220399377813245, -6.102149735719845, -2.025801598657147, -4.588978122511424, -1.138101763284427, 1.0363142137600792, 8.918282687002737, -3.386889926249688, 2.445482415862795, -1.0078157135086234, 2.769159117939951, -3.150319415584065, 2.5321809625838028, -1.642009378971283, -0.006485944536307038, 0.14886805410637788, 1.2223938737446387e-15], 'M': [16.1748305282021, -4.760543431233167, 3.17806135796763, -2.299252810221058, 6.945059549890812, 1.9684904609094764, -7.026278908301175, 3.4501597803106523, -1.6438627376419503, -2.928376387014608, -4.05155776706991, -6.848073056941311, -1.6950582237686103, 3.1870785038217564, 0.26594853021058606, -1.2648816668568925, -3.373258813794027, 0.10823590046473205, 0.94215903292332, 1.2223938737446381e-15], 'F': [19.44105221572264, -0.464275981989561, -3.988159402687446, 2.537281935019179, -1.1799767423053042, 4.132039528346388, 0.6080239521217078, 2.4067175076814364, -1.223161025621695, -1.330460615103349, 3.4614970272297767, -2.1535092951189494, 0.037676860194751555, -3.8638472974161937, -4.491291473786294, 6.610581963399969, 0.024105331442943582, 1.5906264303244464, 0.07304864170997138, 1.2223938737446393e-15], 'P': [-16.816701121719948, 11.219759951482212, -14.556461364270184, 19.034938348204168, 7.438403478086134, -3.7808532771141286, -3.75422448235361, 0.07407835444764826, -0.7941886462122442, 0.6180717612091464, 0.30671321178720506, 0.3189417668186018, -0.3293045608654604, 0.2011626424685875, 0.5042517432074641, 0.12364250949280928, -0.23822718054431216, -0.13549398540410673, 0.3348737074322725, 1.2223938737446387e-15], 'S': [-12.65182491399473, 8.064252481497578, 1.362023891050795, -3.4268749231593563, -2.244941184497635, -2.8830006291924533, 1.7999401599018832, 3.416153890442027, 6.239889761498357, 0.09914727935817165, 1.1097056794879918, -1.9040513259346943, 1.689106545074352, 1.4532692426419622, 1.4691762680411948, -0.26213034855516365, -1.1725897936370973, 6.2331641395780055, -1.9478319315839436, 1.2223938737446383e-15], 'T': [-4.930504019551623, 6.012165420510983, 0.18549285674214322, -1.4234588213812809, -2.6769930771462134, -6.243154450671257, 4.131398732169458, 2.721984911886361, 7.104243302245752, -4.665510539636705, -2.080577399037018, 0.27965431584448874, 0.0029846624312389242, -1.0034725478740898, -4.097677300296537, -0.8053382420499478, -2.42420045996958, -4.37007859119585, -0.11824304499429703, 1.2223938737446395e-15], 'W': [17.594701990163117, -8.797000906059113, -13.159774295727528, -0.09629552052013261, 0.8996852240716142, 8.59841761222856, -0.8725223564231183, -6.083014409914444, 8.029039967062719, -0.7690225976348397, -1.2250784520168927, 3.348495736493084, -2.3386735948706407, -0.3865847812522959, 1.3529747627705346, -1.4446261527764146, 0.29485086573821634, 0.7439520492228338, 0.6150806290716546, 1.22239387374464e-15], 'Y': [8.30617085315137, -3.2964477117207984, -12.994878467511441, 0.38997882852787547, -7.4344420943197225, 1.9577011691298023, 4.840809392992785, 0.18729583352396598, -2.677191106407308, 1.424913509046673, -1.4531462713857608, -2.3216947034860853, 8.107274492161196, 2.96889599803636, 2.527096141332955, 0.489836783119848, -0.6154893575826234, -1.8059266808244314, -0.3077371931220417, 1.2223938737446377e-15], 'V': [15.949911456315228, 8.71235082418892, 6.134666244988628, 1.5306038174481476, -3.1507137110957033, -6.396923914725789, 4.699987541685389, -1.077460568475139, -2.6380804104266264, -1.2636494873944244, -2.9605539310896325, 3.443845978546013, -1.0124134082519836, 0.6524598723834925, 1.1345459607803672, 0.1969022099873305, 0.37341171222902036, 2.163693710995632, 5.508201124277812, 1.22239387374464e-15]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal8_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.24066377211842255, 5.65969785469955, 16.599211023637935, 1.3097478890746035, 3.3583367401074122, 1.957955865701426, -3.65491097533641, -0.666017456564982], 'R': [-8.175186375087945, -15.780857765273936, -1.112618739491064, 0.4244865225245392, -8.777169559864916, -8.281629519219157, -3.224040150226029, -5.524637553182541], 'N': [-15.225239806788673, -0.047937171809988, -4.124887155516504, -5.722243479833081, -1.9426943193172432, 3.159841452547806, 2.623486827434296, 5.526138023352474], 'D': [-18.35346798977429, -2.934034646175445, -0.0383431205237743, -2.013035735766272, 5.9104551283716145, 5.4754221922701305, 9.028230387762994, -1.402192259721239], 'C': [8.222843168110185, 8.194823220036232, -7.137232613352413, -14.957033851071078, 10.512174806669622, -8.49539978461511, -1.8117615264544948, -5.004845470465567], 'Q': [-7.867675607068859, -9.076028702072207, 1.8276771680284658, -0.3515531224557015, 1.4705172386403582, -2.4903294053603227, -1.135252274606228, 0.44825471332069167], 'E': [-12.058054892084751, -11.6695987089715, 11.278428198084132, 2.802903634567096, 8.313549004366951, 3.5151153978508924, 4.806178718372665, -3.306698979860564], 'G': [-16.404901173636215, 21.59346086417884, 1.4058068489974482, -5.276881189193427, -8.11337629439224, 9.225239041757481, -5.809497703512754, -4.253208689317336], 'H': [-0.3293056517119849, -8.34848091455253, -4.903225342523245, -4.587015180544636, 0.937183293632763, 0.7178196146057543, -4.270906333720624, 9.326952781682246], 'I': [20.674129574414085, 5.865722652134503, 2.775349641104194, 3.819243261920098, -2.761251180424646, -2.0083369118312966, 4.018084215679328, 0.0023650167931423102], 'L': [17.902498363007382, 3.266663925038249, 11.474192365515652, 6.58233620350471, -1.4023949124425914, 1.8982948209603336, -0.4043699652228913, 0.8894484340630131], 'K': [-11.693940525712424, -13.413682815019335, 5.794687061746053, 1.7220303269471502, -6.1022396571032615, -2.0256838499262497, -4.588493903375134, -1.1391018215478717], 'M': [16.174830577215076, -4.760546110481472, 3.178056194046207, -2.2992222953051566, 6.945333682964412, 1.9681461176208572, -7.027638678181382, 3.4527498195881328], 'F': [19.441052198058152, -0.4642750223567195, -3.9881575624586234, 2.537271249058383, -1.1800706071571776, 4.1321564055001065, 0.6084563489490445, 2.406005781220233], 'P': [-16.8167011128003, 11.219759476566047, -14.556462261901626, 19.034943317706137, 7.438446812965774, -3.7809068778505726, -3.754407118244148, 0.07430441272831402], 'S': [-12.651824993738675, 8.064256785132683, 1.362032109434074, -3.426922181924931, -2.2453544608230556, -2.882497585320281, 1.801711753882256, 3.413737043779691], 'T': [-4.930503994290925, 6.012164019237194, 0.18549012756198976, -1.4234421530465515, -2.6768410903747277, -6.243352245136205, 4.130551946027289, 2.724103133188852], 'W': [17.59470202332362, -8.797002720107587, -13.159777796219515, -0.09627470586741355, 0.8998732282165504, 8.598182380651389, -0.8734809502302504, -6.080858524839967], 'Y': [8.306170810017367, -3.2964453310791018, -12.994873849991532, 0.38995106194144535, -7.434694724477769, 1.9580243004197428, 4.842132604065035, 0.18461251073708954], 'V': [15.949911636430766, 8.712341110876528, 6.134647703822157, 1.530710427764089, -3.149783129557841, -6.398061410626728, 4.695926776937451, -1.0711109149538218]}
--------------------------------------------------------------------------------
/pca/Amino_Acids_PCAVal9_dict.txt:
--------------------------------------------------------------------------------
1 | {'A': [0.2406638474208526, 5.659693734212348, 16.599203076407203, 1.3097949561732714, 3.3587565536250947, 1.957431025859745, -3.6569027228538595, -0.6627086148591452, 3.5448467293745174], 'R': [-8.175186369473574, -15.78085805144607, -1.112619262177465, 0.4244890854752146, -8.777147893560974, -8.281651787808485, -3.2240499230410324, -5.524673141417409, -2.221851093944081], 'N': [-15.225239891019033, -0.04793258958000641, -4.12487835267321, -5.7222950803973704, -1.9431521714500275, 3.160409222539737, 2.6256212266098107, 5.522366181432312, -0.12654664349419992], 'D': [-18.353467989973872, -2.934034632577304, -0.03834309007237499, -2.013035982306187, 5.910450550347399, 5.475429725958869, 9.02824148396833, -1.4021128977046737, -2.0490134963281474], 'C': [8.222843193809988, 8.194821816409586, -7.137235317851974, -14.95701773786358, 10.512318402717254, -8.495575154730723, -1.812438795773208, -5.00345606971555, -1.3075950043221458], 'Q': [-7.867675634628625, -9.076027179235778, 1.827680124910278, -0.35157096714857755, 1.4703551942537154, -2.4901219979030342, -1.1344037146180144, 0.44656585584872505, 0.462781517937249], 'E': [-12.05805488086634, -11.669599326113387, 11.278427001230776, 2.802910909006573, 8.313613574599755, 3.5150335889209474, 4.805804948786787, -3.3056966753903554, -2.6644344062107446], 'G': [-16.404901187801578, 21.59346163650466, 1.4058083342047631, -5.276889954049402, -8.113453005934886, 9.225328286159453, -5.809173297759741, -4.253510380685077, -3.683105640188424], 'H': [-0.3293056231310315, -8.34848250352379, -4.903228442934688, -4.586996096585342, 0.9373596019507827, 0.717589598563748, -4.271917130801591, 9.329191924688228, -3.1121855340623514], 'I': [20.67412975580201, 5.865712858513284, 2.775330928510446, 3.8193511673585383, -2.760304951650123, -2.009496639472548, 4.01384371329947, 0.009658360646291029, -3.9094917073578213], 'L': [17.90249830296106, 3.266667204800458, 11.474198681825081, 6.582298908637981, -1.402727561862133, 1.8987092316588476, -0.40278169709883255, 0.8865619205386913, 1.6335919603390872], 'K': [-11.693940511387954, -13.41368361464111, 5.7946854980685245, 1.7220399377813238, -6.10214973571984, -2.0258015986571403, -4.5889781225114135, -1.1381017632844042, 1.0363142137598578], 'M': [16.174830528202094, -4.760543431233152, 3.178061357967631, -2.2992528102210685, 6.945059549890805, 1.9684904609094724, -7.026278908301175, 3.450159780310659, -1.6438627376418868], 'F': [19.441052215722625, -0.46427598198954934, -3.988159402687436, 2.5372819350191747, -1.1799767423053011, 4.132039528346374, 0.6080239521217031, 2.4067175076814475, -1.2231610256216308], 'P': [-16.81670112171994, 11.2197599514822, -14.556461364270168, 19.034938348204175, 7.438403478086152, -3.7808532771141357, -3.7542244823536013, 0.07407835444766503, -0.7941886462122623], 'S': [-12.651824913994739, 8.064252481497569, 1.362023891050794, -3.426874923159348, -2.2449411844976463, -2.8830006291924493, 1.7999401599018832, 3.4161538904419695, 6.239889761498372], 'T': [-4.930504019551626, 6.012165420510976, 0.18549285674214566, -1.4234588213812693, -2.6769930771462116, -6.24315445067123, 4.131398732169452, 2.721984911886276, 7.10424330224588], 'W': [17.594701990163117, -8.797000906059079, -13.159774295727534, -0.09629552052011908, 0.8996852240716118, 8.598417612228527, -0.8725223564231394, -6.083014409914517, 8.029039967062687], 'Y': [8.30617085315137, -3.296447711720786, -12.994878467511443, 0.3899788285278774, -7.4344420943197305, 1.9577011691297967, 4.840809392992788, 0.18729583352398815, -2.6771911064073444], 'V': [15.949911456315206, 8.712350824188917, 6.13466624498864, 1.530603817448151, -3.150713711095705, -6.3969239147257655, 4.699987541685387, -1.0774605684751304, -2.6380804104266136]}
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # This file may be used to create an environment using:
2 | # $ conda create --name --file
3 | # platform: linux-64
4 | _libgcc_mutex=0.1=conda_forge
5 | _openmp_mutex=4.5=1_gnu
6 | _tflow_select=2.3.0=mkl
7 | absl-py=0.9.0=py37_0
8 | astor=0.8.1=py37_0
9 | binutils_impl_linux-64=2.34=h2122c62_9
10 | binutils_linux-64=2.34=h47ac705_27
11 | blinker=1.4=py37_0
12 | brotlipy=0.7.0=py37h7b6447c_1000
13 | c-ares=1.15.0=h7b6447c_1001
14 | ca-certificates=2020.6.20=hecda079_0
15 | cachetools=4.1.1=py_0
16 | certifi=2020.6.20=py37hc8dfbb8_0
17 | cffi=1.14.0=py37h2e261b9_0
18 | chardet=3.0.4=py37_1003
19 | click=7.1.2=pyh9f0ad1d_0
20 | cryptography=3.1=py37h1ba5d50_0
21 | cycler=0.10.0=py_2
22 | flask=1.1.2=pyh9f0ad1d_0
23 | freetype=2.10.2=he06d7ca_0
24 | gast=0.2.2=py37_0
25 | gcc_impl_linux-64=9.3.0=ha2fd2e4_16
26 | gcc_linux-64=9.3.0=h44160b2_27
27 | google-auth=1.20.1=py_0
28 | google-auth-oauthlib=0.4.1=py_2
29 | google-pasta=0.2.0=py_0
30 | grpcio=1.31.0=py37hf8bcb03_0
31 | gxx_impl_linux-64=9.3.0=hde52e87_16
32 | gxx_linux-64=9.3.0=ha9dd585_27
33 | h5py=2.10.0=py37hd6299e0_1
34 | hdf5=1.10.6=hb1b8bf9_0
35 | icu=67.1=he1b5a44_0
36 | idna=2.10=py_0
37 | importlib-metadata=1.7.0=py37_0
38 | itsdangerous=1.1.0=py_0
39 | jansson=2.11=h516909a_1001
40 | jinja2=2.11.2=pyh9f0ad1d_0
41 | joblib=0.16.0=py_0
42 | jpeg=9d=h516909a_0
43 | keras=2.3.1=py37_0
44 | keras-applications=1.0.8=py_1
45 | keras-preprocessing=1.1.0=py_1
46 | kernel-headers_linux-64=2.6.32=h77966d4_13
47 | kiwisolver=1.2.0=py37h99015e2_0
48 | lcms2=2.11=hbd6801e_0
49 | ld_impl_linux-64=2.34=hc38a660_9
50 | libblas=3.8.0=17_openblas
51 | libcblas=3.8.0=17_openblas
52 | libffi=3.2.1=he1b5a44_1007
53 | libgcc-devel_linux-64=9.3.0=hfd08b2a_16
54 | libgcc-ng=9.3.0=h24d8f2e_16
55 | libgfortran-ng=7.5.0=hdf63c60_16
56 | libgomp=9.3.0=h24d8f2e_16
57 | libgpuarray=0.7.6=h14c3975_1003
58 | libiconv=1.16=h516909a_0
59 | liblapack=3.8.0=17_openblas
60 | libopenblas=0.3.10=pthreads_hb3c22a3_4
61 | libpng=1.6.37=hed695b0_2
62 | libprotobuf=3.13.0=hd408876_0
63 | libstdcxx-devel_linux-64=9.3.0=h4084dd6_16
64 | libstdcxx-ng=9.3.0=hdf63c60_16
65 | libtiff=4.1.0=hc7e4089_6
66 | libwebp-base=1.1.0=h516909a_3
67 | libxml2=2.9.10=h68273f3_2
68 | lz4-c=1.9.2=he1b5a44_3
69 | mako=1.1.3=pyh9f0ad1d_0
70 | markdown=3.2.2=py37_0
71 | markupsafe=1.1.1=py37h8f50634_1
72 | matplotlib=3.3.1=1
73 | matplotlib-base=3.3.1=py37hd478181_1
74 | ncurses=6.2=he1b5a44_1
75 | numpy=1.19.1=py37h7ea13bd_2
76 | oauthlib=3.1.0=py_0
77 | olefile=0.46=py_0
78 | openssl=1.1.1g=h516909a_1
79 | opt_einsum=3.1.0=py_0
80 | pandas=1.1.1=py37h3340039_0
81 | pcre=8.44=he1b5a44_0
82 | pillow=7.2.0=py37h718be6c_1
83 | pip=20.2.2=py_0
84 | protobuf=3.13.0=py37hf484d3e_0
85 | pyasn1=0.4.8=py_0
86 | pyasn1-modules=0.2.7=py_0
87 | pycparser=2.20=py_2
88 | pygpu=0.7.6=py37h03ebfcd_1001
89 | pyjwt=1.7.1=py37_0
90 | pyopenssl=19.1.0=py_1
91 | pyparsing=2.4.7=pyh9f0ad1d_0
92 | pysocks=1.7.1=py37_1
93 | python=3.7.8=h6f2ec95_1_cpython
94 | python-dateutil=2.8.1=py_0
95 | python_abi=3.7=1_cp37m
96 | pytz=2020.1=pyh9f0ad1d_0
97 | pyyaml=5.3.1=py37h8f50634_0
98 | readline=8.0=he28a2e2_2
99 | requests=2.24.0=py_0
100 | requests-oauthlib=1.3.0=py_0
101 | rsa=4.6=py_0
102 | scikit-learn=0.23.2=py37h6785257_0
103 | scipy=1.5.2=py37hb14ef9d_0
104 | setuptools=49.6.0=py37hc8dfbb8_0
105 | six=1.15.0=pyh9f0ad1d_0
106 | sqlite=3.33.0=h4cf870e_0
107 | sysroot_linux-64=2.12=h77966d4_13
108 | tensorboard=2.2.1=pyh532a8cf_0
109 | tensorboard-plugin-wit=1.6.0=py_0
110 | tensorflow=2.1.0=mkl_py37h80a91df_0
111 | tensorflow-base=2.1.0=mkl_py37h6d63fb7_0
112 | tensorflow-estimator=2.1.0=pyhd54b08b_0
113 | termcolor=1.1.0=py37_1
114 | theano=1.0.4=py37hf484d3e_1000
115 | threadpoolctl=2.1.0=pyh5ca1d4c_0
116 | tk=8.6.10=hed695b0_0
117 | tornado=6.0.4=py37h8f50634_1
118 | urllib3=1.25.10=py_0
119 | uwsgi=2.0.18=py37h427a7ac_4
120 | werkzeug=1.0.1=pyh9f0ad1d_0
121 | wheel=0.35.1=pyh9f0ad1d_0
122 | wrapt=1.12.1=py37h7b6447c_1
123 | xlrd=1.2.0=pyh9f0ad1d_1
124 | xz=5.2.5=h516909a_1
125 | yaml=0.2.5=h516909a_0
126 | zipp=3.1.0=py_0
127 | zlib=1.2.11=h516909a_1009
128 | zstd=1.4.5=h6597ccf_2
129 |
--------------------------------------------------------------------------------