├── webserver ├── web.md ├── tests.py ├── admin.py ├── apps.py ├── models.py ├── views.py ├── pred_torsion.py ├── rebulid.py └── model.py ├── Training models ├── read.md ├── fetch_top_models.py ├── cross_val_dataset_sep.py ├── coordinates_extraction.py ├── batch_test.py ├── distance_map.py ├── batch_validation.py ├── make_dataset.py └── modelable_assess.py ├── process.jpg ├── rotation.jpg ├── our_process .jpg ├── README.md ├── ncbi_spydier.py ├── extraction.py ├── extract_coord.py ├── computation_rmsd.py ├── transform.py └── angle_computation.ipynb /webserver/web.md: -------------------------------------------------------------------------------- 1 | ##Web source code 2 | -------------------------------------------------------------------------------- /Training models/read.md: -------------------------------------------------------------------------------- 1 | Model training and evaluation and fusion code 2 | -------------------------------------------------------------------------------- /process.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElvinJun/DeepPBS/HEAD/process.jpg -------------------------------------------------------------------------------- /rotation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElvinJun/DeepPBS/HEAD/rotation.jpg -------------------------------------------------------------------------------- /our_process .jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElvinJun/DeepPBS/HEAD/our_process .jpg -------------------------------------------------------------------------------- /webserver/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /webserver/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /webserver/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class FileoperationConfig(AppConfig): 5 | name = 'fileoperation' 6 | -------------------------------------------------------------------------------- /webserver/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | class Files(models.Model): 5 | id = models.AutoField(max_length=10, primary_key=True, verbose_name='id') 6 | file = models.FileField(upload_to='./files') 7 | def __unicode__(self): # __str__ on Python 3 8 | return (self.id,self.file) 9 | 10 | class Files_name(models.Model): 11 | id = models.AutoField(max_length=10, primary_key=True, verbose_name='id') 12 | name = models.CharField(max_length=10) 13 | files = models.ManyToManyField(Files, related_name='files') 14 | def __unicode__(self): # __str__ on Python 3 15 | return (self.id,self.name,self.files) -------------------------------------------------------------------------------- /Training models/fetch_top_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import numpy as np 4 | import shutil 5 | 6 | 7 | train_name = 'nr40_Split_LeL2_Drop05' 8 | cross_validation_fold = 10 9 | 10 | top_models_folder = os.path.join('./outputs/%s_top_models_2' % train_name) 11 | pathlib.Path(top_models_folder).mkdir(parents=True, exist_ok=True) 12 | 13 | val_losses = [] 14 | for subset_index in range(cross_validation_fold): 15 | with open('./outputs/%s_%d/validation_map_2.txt' % (train_name, subset_index)) as file: 16 | lines = file.readlines() 17 | for line in lines[1::4]: 18 | val_losses.append(float(line.split('=')[1][:-1])) 19 | 20 | total_epochs = len(val_losses) // cross_validation_fold 21 | val_losses = np.array(val_losses).reshape(-1, total_epochs) 22 | 23 | top_num = 5 24 | for subset_index in range(cross_validation_fold): 25 | for top_index in np.argsort(val_losses[subset_index])[:top_num]: 26 | model_path = './outputs/%s_%d/%d_Linear.pth' % (train_name, subset_index, top_index) 27 | new_model_path = os.path.join(top_models_folder, 'model_%d.pth' % (top_index + subset_index * total_epochs)) 28 | shutil.copy(model_path, new_model_path) 29 | -------------------------------------------------------------------------------- /Training models/cross_val_dataset_sep.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import random 4 | import pathlib 5 | 6 | 7 | DATA_PATH = '/share/Data/processed/nr40/bitorsion' 8 | SUBSET_PATH = '/share/Data/processed/nr40/10fold_val_subset' 9 | subset_fold = 10 10 | 11 | 12 | filenames = os.listdir(DATA_PATH) 13 | random.shuffle(filenames) 14 | print('Total file number = %d' % len(filenames)) 15 | 16 | for i in range(subset_fold): 17 | print('subset', i) 18 | train_path = os.path.join(SUBSET_PATH, 'subset_%d/train' % i) 19 | val_path = os.path.join(SUBSET_PATH, 'subset_%d/val' % i) 20 | pathlib.Path(train_path).mkdir(parents=True, exist_ok=True) 21 | pathlib.Path(val_path).mkdir(parents=True, exist_ok=True) 22 | 23 | start_index = i / 10 * len(filenames) // 1 24 | end_index = (i + 1) / 10 * len(filenames) // 1 25 | print('from %d to %d' % (start_index, end_index)) 26 | 27 | for k, filename in enumerate(filenames): 28 | if start_index <= k < end_index: 29 | shutil.copy(os.path.join(DATA_PATH, filename), os.path.join(val_path, filename)) 30 | else: 31 | shutil.copy(os.path.join(DATA_PATH, filename), os.path.join(train_path, filename)) 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepPBS 2 | **Motivation:** Accurate all-atom protein structures play an important role in various research and applications. However, in most cases, only coarse-grained models can be obtained for reasons. Precisely predict protein backbone structures based on alpha-carbon traces, the most-used coarse-grained model, is a pivotal step for precise all-atom modeling for protein structures. 3 | 4 | **Results:** In this study, we proposed a deep learning-based method to predict protein backbone structures from alpha-carbon traces. Our method achieved comparable performance as the best previous method with cRMSD between predicted coordinates and reference coordinates as measurement. 5 | 6 | # Workflow 7 | ![image](https://user-images.githubusercontent.com/46809259/115357912-b3290e00-a1ef-11eb-8b82-0b58706c48a9.png) 8 | 9 | # Webserver 10 | [点击进入骨架结构预测网页](http://deeppbs.com/) 11 | * Python / Pytorch / Django 12 | * KNN / Bi-litsm / Rodrigues 13 | 14 | 15 | # Protein structure prediction process 16 | ![](https://github.com/ElvinJun/DeepPBS/blob/master/process.jpg?raw=true) 17 | 18 | 19 | # Protein backbone strcture prediction based on Bi-LSTM 20 | ![deeppbs](https://github.com/ElvinJun/DeepPBS/blob/master/our_process%20.jpg?raw=true) 21 | 22 | 23 | # Method of rotation repetition 24 | ![rotation](https://github.com/ElvinJun/DeepPBS/blob/master/rotation.jpg?raw=true) 25 | -------------------------------------------------------------------------------- /Training models/coordinates_extraction.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import numpy as np 4 | 5 | 6 | def extract_cif(path, filename): 7 | with open(os.path.join(path, filename), 'r') as file: 8 | message = file.readlines() 9 | coos = [] 10 | for line in message: 11 | line = line.split() 12 | if line[3] != 'CB': 13 | x = line[10] 14 | y = line[11] 15 | z = line[12] 16 | coos.append([float(x), float(y), float(z)]) 17 | coos = np.array(coos) 18 | return coos.astype('float32') 19 | 20 | 21 | dataset_name = 'nr_40' 22 | LIST_PATH = 'D:\protein_structure_prediction\data\dataset/nr_list/best_rebuild_nr40.txt' # % dataset_name 23 | DATA_PATH = 'D:\protein_structure_prediction\data\dataset/cif_remove_again' 24 | COOR_PATH = 'D:\protein_structure_prediction\data\dataset/processed_data/%s/coordinates' % dataset_name 25 | 26 | pathlib.Path(COOR_PATH).mkdir(parents=True, exist_ok=True) 27 | 28 | with open(LIST_PATH, 'r') as file: 29 | filenames = file.read().split('\n') 30 | finished_filenames = os.listdir(COOR_PATH) 31 | finished_num = 0 32 | for filename in finished_filenames: 33 | if filename in filenames: 34 | filenames.remove(filename) 35 | finished_num += 1 36 | print('%d finished! %d to go!' % (finished_num, len(filenames))) 37 | 38 | 39 | failed_filename = [] 40 | for filename in filenames: 41 | print(filename) 42 | 43 | coos = extract_cif(DATA_PATH, filename + '.cif') 44 | 45 | np.save(os.path.join(COOR_PATH, filename), coos) 46 | -------------------------------------------------------------------------------- /ncbi_spydier.py: -------------------------------------------------------------------------------- 1 | from Bio import Entrez 2 | import os 3 | Entrez.email = 'xxxxxxxxxxx@qq.com' # always tell who you are 4 | # handle = Entrez.egquery(term="E.coli") 5 | # record = Entrez.read(handle) 6 | # for row in record["eGQueryResult"]: 7 | # if row["DbName"]=="pubmed": 8 | # print row["Count"] 9 | handle = Entrez.esearch(db="pubmed", term="growth phase" , retmax=500000) 10 | record = Entrez.read(handle) 11 | idlist = record["IdList"] 12 | list3_2= idlist 13 | print(len(list3_2)) 14 | 15 | Entrez.email = 'xxxxxxxxxxx@qq.com' # always tell who you are 16 | # handle = Entrez.egquery(term="promoter") 17 | # record = Entrez.read(handle) 18 | # for row in record["eGQueryResult"]: 19 | # if row["DbName"]=="pubmed": 20 | # print row["Count"] 21 | handle = Entrez.esearch(db="pubmed", term="stress response", retmax=500000) 22 | record = Entrez.read(handle) 23 | idlist = record["IdList"] 24 | list3_3 = idlist 25 | print(len(list3_3)) 26 | 27 | Entrez.email = 'xxxxxxxxxxx@qq.com' # always tell who you are 28 | # handle = Entrez.egquery(term="stationary phase") 29 | # record = Entrez.read(handle) 30 | # for row in record["eGQueryResult"]: 31 | # if row["DbName"]=="pubmed": 32 | # print row["Count"] 33 | 34 | handle = Entrez.esearch(db="pubmed", term="acid response", retmax=500000) 35 | record = Entrez.read(handle) 36 | idlist = record["IdList"] 37 | list3_4 = idlist 38 | print(len(list3_4)) 39 | 40 | 41 | handle = Entrez.esearch(db="pubmed", term="pH response", retmax=5000000 ) 42 | record = Entrez.read(handle) 43 | idlist = record["IdList"] 44 | list3_5 = idlist 45 | print(len(list3_5)) 46 | 47 | f3_2 = open(os.path.join(os.getcwd(),'growth phase.txt'), 'w') 48 | f3_3 = open(os.path.join(os.getcwd(),'stress response.txt'), 'w') 49 | f3_4 = open(os.path.join(os.getcwd(),'acid response.txt'), 'w') 50 | f3_5 = open(os.path.join(os.getcwd(),'pH response.txt'), 'w') 51 | 52 | for i in list3_2: 53 | f3_2.write(i + ' \n') 54 | for i in list3_3: 55 | f3_3.write(i + ' \n') 56 | for i in list3_4: 57 | f3_4.write(i + ' \n') 58 | for i in list3_5: 59 | f3_5.write(i + '\n ') 60 | 61 | f3_2.close() 62 | f3_3.close() 63 | f3_4.close() 64 | f3_5.close() 65 | 66 | -------------------------------------------------------------------------------- /webserver/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | from django.views.decorators.http import require_GET, require_POST 3 | from django.http import HttpResponse 4 | from django.conf import settings 5 | from django.core.files import File 6 | import logging 7 | import subprocess 8 | import random 9 | import os 10 | import time 11 | logger = logging.getLogger('django') 12 | 13 | 14 | def save_dir(): 15 | LOCAL_TIME = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 16 | FILES_DIR = os.path.join(r'files', LOCAL_TIME) 17 | if not os.path.exists(FILES_DIR): 18 | os.makedirs(FILES_DIR) 19 | 20 | else: 21 | FILES_DIR = FILES_DIR + '-' + str(random.randint(1, 1000)) 22 | os.makedirs(FILES_DIR) 23 | SAVED_FILES_DIR = os.path.join(FILES_DIR, 'CA_info') 24 | os.makedirs(SAVED_FILES_DIR) 25 | return SAVED_FILES_DIR 26 | 27 | # SAVED_FILES_DIR = save_dir() 28 | # files = os.listdir(SAVED_FILES_DIR) 29 | # for file in files: 30 | # file_pathname = os.path.join(SAVED_FILES_DIR, file) 31 | # os.unlink(file_pathname) 32 | 33 | # Create your views here. 34 | def render_home_template(request): 35 | 36 | return render(request, 'home.html') 37 | 38 | def render_home_template1(request): 39 | files = os.listdir(SAVED_FILES_DIR) 40 | return render(request, 'download.html', {'files': files}) 41 | 42 | def home(request): 43 | 44 | return render(request, 'home.html') 45 | 46 | 47 | def download(request, filename): 48 | file_pathname = os.path.join(SAVED_FILES_DIR.replace('CA_info', 'backbone'), filename) 49 | 50 | with open(file_pathname, 'rb') as f: 51 | file = File(f) 52 | 53 | response = HttpResponse(file.chunks(), 54 | content_type='APPLICATION/OCTET-STREAM') 55 | response['Content-Disposition'] = 'attachment; filename=' + filename 56 | response['Content-Length'] = os.path.getsize(file_pathname) 57 | # os.unlink(file_pathname) 58 | return response 59 | 60 | 61 | def upload(request): 62 | global SAVED_FILES_DIR 63 | SAVED_FILES_DIR = save_dir() 64 | files = request.FILES.getlist('filename') 65 | if not files: 66 | return render_home_template(request) 67 | 68 | 69 | for file in files: 70 | destination = open(SAVED_FILES_DIR + '/' + file.name, 'wb+') 71 | for chunk in file.chunks(): 72 | destination.write(chunk) 73 | 74 | destination.close() 75 | shell = 'python D:/python/webserver/fileoperation/model.py ' + SAVED_FILES_DIR 76 | child = subprocess.Popen(shell, 77 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) 78 | stdout, stderr = child.communicate() 79 | result = str(stderr, encoding='utf-8') # 将脚本反馈的结果输入result 80 | logger.info(result) 81 | return render_home_template1(request) 82 | 83 | 84 | def index(request): 85 | return render(request, 'index.html') 86 | -------------------------------------------------------------------------------- /webserver/pred_torsion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.data import DataLoader 4 | import os 5 | from torch.nn import functional as F 6 | from torch.utils.data import Dataset 7 | import numpy as np 8 | import pathlib 9 | 10 | class DistanceWindow(Dataset): 11 | """Extract distance window arrays""" 12 | 13 | def __init__(self, distance_window_path): 14 | self.distance_window_path = distance_window_path 15 | self.file_list = os.listdir(distance_window_path) 16 | 17 | def __len__(self): 18 | return len(self.file_list) 19 | 20 | def __getitem__(self, idx): 21 | filename = self.file_list[idx] 22 | arrays = np.load(os.path.join(self.distance_window_path, filename)).reshape((-1, 60)) 23 | # mix_arrays = np.concatenate((arrays[:-1], arrays[1:]), 1) 24 | print(arrays.shape) 25 | torsions = np.load(os.path.join(self.distance_window_path, filename)) 26 | 27 | return arrays, torsions, filename 28 | 29 | # torch.cuda.set_device(0) 30 | 31 | 32 | if torch.cuda.is_available(): 33 | print('GPU available!!!') 34 | print('MainDevice=', device) 35 | 36 | 37 | def swish_fn(x): 38 | """ Swish activation function """ 39 | return x * torch.sigmoid(x) 40 | 41 | 42 | class SplitModel(nn.Module): 43 | def __init__(self, input_dim, hidden_dim, feature_dim, output_dim): 44 | super().__init__() 45 | 46 | self.hidden1 = nn.Linear(input_dim, hidden_dim) 47 | self._bn1 = nn.BatchNorm1d(hidden_dim) 48 | 49 | self.hidden2 = nn.Linear(hidden_dim, 2*hidden_dim) 50 | self._bn2 = nn.BatchNorm1d(2*hidden_dim) 51 | 52 | self.hidden3 = nn.Linear(2*hidden_dim, hidden_dim) 53 | self._bn3 = nn.BatchNorm1d(hidden_dim) 54 | 55 | 56 | 57 | self.extract_feature = nn.Linear(hidden_dim, feature_dim) 58 | self._bn4 = nn.BatchNorm1d(feature_dim) 59 | 60 | self.lstm = nn.LSTM(feature_dim, hidden_dim, bidirectional=True) 61 | self._bn5 = nn.BatchNorm1d(2 * hidden_dim) 62 | 63 | # self.sub_net1 = nn.Linear(2 * hidden_dim, hidden_dim) 64 | # self._bn_s1 = nn.BatchNorm1d(hidden_dim) 65 | # self.output1 = nn.Linear(hidden_dim, output_dim) 66 | # 67 | # self.sub_net2 = nn.Linear(2 * hidden_dim, hidden_dim) 68 | # self._bn_s2 = nn.BatchNorm1d(hidden_dim) 69 | # self.output2 = nn.Linear(hidden_dim, output_dim) 70 | 71 | def forward(self, arrays): 72 | hidden1 = swish_fn(self._bn1(self.hidden1(arrays))) 73 | hidden2 = swish_fn(self._bn2(self.hidden2(hidden1))) 74 | hidden3 = swish_fn(self._bn3(self.hidden3(hidden2))) 75 | features = swish_fn(self._bn4(self.extract_feature(hidden3))) 76 | 77 | hidden, _ = self.lstm(features.view(len(features), 1, -1)) 78 | output = swish_fn(self._bn5(hidden.squeeze(1))) 79 | 80 | # sub_hidden1 = swish_fn(self._bn_s1(self.sub_net1(hidden))) 81 | # # sub_hidden1 = F.dropout(sub_hidden1, p=0.5, training=self.training) 82 | # output1 = self.output1(sub_hidden1) 83 | # 84 | # sub_hidden2 = swish_fn(self._bn_s2(self.sub_net2(hidden))) 85 | # # sub_hidden2 = F.dropout(sub_hidden2, p=0.5, training=self.training) 86 | # output2 = self.output1(sub_hidden2) 87 | # 88 | # output = torch.cat([output1, output2], 1) 89 | return output 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /Training models/batch_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from npy_data_loader import DistanceWindow 4 | from torch.utils.data import DataLoader 5 | import os 6 | from torch.nn import functional as F 7 | import numpy as np 8 | import pathlib 9 | 10 | 11 | torch.cuda.set_device(0) 12 | device = torch.device('cuda:0') 13 | 14 | if torch.cuda.is_available(): 15 | print('GPU available!!!') 16 | print('MainDevice=', device) 17 | 18 | 19 | train_name = 'nr40_2_Split_L1_Drop05' 20 | save_dir = './outputs/' + train_name 21 | val_dir = os.path.join(save_dir, 'val') 22 | 23 | is_cross_validation = True 24 | cross_validation_fold = 10 25 | 26 | 27 | test_dataset = DistanceWindow( 28 | distance_window_path='/share/Data/processed/test_set/distance_window', 29 | torsion_path='/share/Data/processed/test_set/bitorsion') 30 | test_data_loader = DataLoader(dataset=test_dataset) 31 | 32 | 33 | def swish_fn(x): 34 | """ Swish activation function """ 35 | return x * torch.sigmoid(x) 36 | 37 | 38 | class SplitModel(nn.Module): 39 | def __init__(self, input_dim, hidden_dim, feature_dim, output_dim): 40 | super().__init__() 41 | 42 | self.hidden1 = nn.Linear(input_dim, hidden_dim) 43 | self._bn1 = nn.BatchNorm1d(hidden_dim) 44 | 45 | self.hidden2 = nn.Linear(hidden_dim, 2*hidden_dim) 46 | self._bn2 = nn.BatchNorm1d(2*hidden_dim) 47 | 48 | self.hidden3 = nn.Linear(2*hidden_dim, hidden_dim) 49 | self._bn3 = nn.BatchNorm1d(hidden_dim) 50 | 51 | self.extract_feature = nn.Linear(hidden_dim, feature_dim) 52 | self._bn4 = nn.BatchNorm1d(feature_dim) 53 | 54 | self.lstm = nn.LSTM(feature_dim, hidden_dim, bidirectional=True) 55 | self._bn5 = nn.BatchNorm1d(2 * hidden_dim) 56 | 57 | self.sub_net1 = nn.Linear(2 * hidden_dim, hidden_dim) 58 | self.output1 = nn.Linear(hidden_dim, output_dim) 59 | 60 | self.sub_net2 = nn.Linear(2 * hidden_dim, hidden_dim) 61 | self.output2 = nn.Linear(hidden_dim, output_dim) 62 | 63 | def forward(self, arrays): 64 | hidden1 = self._bn1(swish_fn(self.hidden1(arrays))) 65 | hidden2 = self._bn2(swish_fn(self.hidden2(hidden1))) 66 | hidden3 = self._bn3(swish_fn(self.hidden3(hidden2))) 67 | features = self._bn4(swish_fn(self.extract_feature(hidden3))) 68 | 69 | hidden, _ = self.lstm(features.view(len(features), 1, -1)) 70 | hidden = self._bn5(hidden.squeeze(1)) 71 | 72 | sub_hidden1 = self.sub_net1(hidden) 73 | sub_hidden1 = F.dropout(sub_hidden1, p=0.5, training=self.training) 74 | output1 = self.output1(sub_hidden1) 75 | 76 | sub_hidden2 = self.sub_net2(hidden) 77 | sub_hidden2 = F.dropout(sub_hidden2, p=0.5, training=self.training) 78 | output2 = self.output1(sub_hidden2) 79 | 80 | output = torch.cat([output1, output2], 1) 81 | return output 82 | 83 | 84 | def test(model, data_loader): 85 | model.eval() 86 | model.is_training = False 87 | with torch.no_grad(): 88 | for arrays, torsions, output_filename in data_loader: 89 | torsions = torsions.to(device) 90 | arrays = arrays.to(device) 91 | pred_sincos = model(arrays[0]).squeeze(1).transpose(0, 1) 92 | 93 | output = np.concatenate((pred_sincos.data.cpu().numpy(), torsions.data.cpu().numpy()[0]), 0) 94 | np.save(os.path.join(test_output_folder, output_filename[0]), output) 95 | 96 | 97 | if __name__ == '__main__': 98 | models_path = os.path.join(os.getcwd(), 'top_models') 99 | for model_name in os.listdir(models_path): 100 | if model_name[-4:] == '.pth': 101 | print(model_name) 102 | # test_output_folder = os.path.join(models_path, 'test_outputs/%s' % model_name[:-4]) 103 | test_output_folder = os.path.join(os.getcwd(), 'comparison_test_outputs/%s' % model_name[:-4]) 104 | pathlib.Path(test_output_folder).mkdir(parents=True, exist_ok=True) 105 | test_model = torch.load(os.path.join(models_path, model_name)).to(device) 106 | 107 | test(test_model, test_data_loader) 108 | -------------------------------------------------------------------------------- /extraction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import os 4 | from numpy import * 5 | from scipy.spatial.distance import pdist 6 | from scipy.spatial.distance import squareform 7 | import pathlib 8 | 9 | 10 | ALPHABET = {'A': 'ALA', 'F': 'PHE', 'C': 'CYS', 'D': 'ASP', 'N': 'ASN', 11 | 'E': 'GLU', 'Q': 'GLN', 'G': 'GLY', 'H': 'HIS', 'L': 'LEU', 12 | 'I': 'ILE', 'K': 'LYS', 'M': 'MET', 'P': 'PRO', 'R': 'ARG', 13 | 'S': 'SER', 'T': 'THR', 'V': 'VAL', 'W': 'TRP', 'Y': 'TYR'} 14 | AA_HYDROPATHICITY_INDEX = {'ARG': -4.5, 'LYS': -3.9, 'ASN': -3.5, 'ASP': -3.5, 'GLN': -3.5, 15 | 'GLU': -3.5, 'HIS': -3.2, 'PRO': -1.6, 'TYR': -1.3, 'TRP': -0.9, 16 | 'SER': -0.8, 'THR': -0.7, 'GLY': -0.4, 'ALA': 1.8, 'MET': 1.9, 17 | 'CYS': 2.5, 'PHE': 2.8, 'LEU': 3.8, 'VAL': 4.2, 'ILE': 4.5} 18 | AA_BULKINESS_INDEX = {'ARG': 14.28, 'LYS': 15.71, 'ASN': 12.82, 'ASP': 11.68, 'GLN': 14.45, 19 | 'GLU': 13.57, 'HIS': 13.69, 'PRO': 17.43, 'TYR': 18.03, 'TRP': 21.67, 20 | 'SER': 9.47, 'THR': 15.77, 'GLY': 3.4, 'ALA': 11.5, 'MET': 16.25, 21 | 'CYS': 13.46, 'PHE': 19.8, 'LEU': 21.4, 'VAL': 21.57, 'ILE': 21.4} 22 | AA_FLEXIBILITY_INDEX = {'ARG': 2.6, 'LYS': 1.9, 'ASN': 14., 'ASP': 12., 'GLN': 4.8, 23 | 'GLU': 5.4, 'HIS': 4., 'PRO': 0.05, 'TYR': 0.05, 'TRP': 0.05, 24 | 'SER': 19., 'THR': 9.3, 'GLY': 23., 'ALA': 14., 'MET': 0.05, 25 | 'CYS': 0.05, 'PHE': 7.5, 'LEU': 5.1, 'VAL': 2.6, 'ILE': 1.6} 26 | AA_MESSAGE = {} 27 | for aa_short in ALPHABET.keys(): 28 | aa_long = ALPHABET[aa_short] 29 | AA_MESSAGE.update({aa_short: [(5.5 - AA_HYDROPATHICITY_INDEX[aa_long]) / 10, 30 | AA_BULKINESS_INDEX[aa_long] / 21.67, 31 | (25. - AA_FLEXIBILITY_INDEX[aa_long]) / 25.]}) 32 | AA_MESSAGE.update({aa_long: [(5.5 - AA_HYDROPATHICITY_INDEX[aa_long]) / 10, 33 | AA_BULKINESS_INDEX[aa_long] / 21.67, 34 | (25. - AA_FLEXIBILITY_INDEX[aa_long]) / 25.]}) 35 | DISTANCE_WINDOW_PATH = 'D:\\database\\rmsd_compare\\real' 36 | # filename = input() 37 | # path = os.path.join(os.getcwd(),filename) 38 | path = 'D:\\database\\rmsd_compare\\real\\4f7v.pdb' 39 | 40 | #提取CA原子信息 41 | def atoms_infos(path): 42 | file = open(path, 'r') 43 | lines = file.readlines() 44 | 45 | atoms_info = [line.strip('\n') for line in lines if line.split()[0] == 'ATOM' and line.split()[2] == 'CA'] 46 | delet = [] 47 | # 筛掉重复概率小的氨基酸 48 | for i in range(len(atoms_info)): 49 | if atoms_info[i - 1].split()[2] == atoms_info[i].split()[2] and atoms_info[i - 1].split()[5] == atoms_info[i].split()[5]: 50 | if atoms_info[i - 1].split()[-3] <= atoms_info[i].split()[-3]: 51 | delet.append(i - 1) 52 | else: 53 | delet.append(i) 54 | for i in delet[::-1]: 55 | del atoms_info[i] 56 | # atoms_info = array(atoms_info) 57 | return atoms_info 58 | 59 | #断链情况是否进行补全 60 | 61 | #提取坐标信息 62 | def extract_coord(atoms_info): 63 | coord_array = np.zeros((len(atoms_info), 3)) 64 | acid_list = [] 65 | for i in range(len(atoms_info)): 66 | coord_array[i] = [float(atoms_info[i].split()[j]) for j in range(6, 9)] 67 | acid_list.append(atoms_info[i].split()[3][-3::]) 68 | acid_array = array(acid_list) 69 | return coord_array, acid_array 70 | 71 | 72 | def torsion(): 73 | for n in range(len(torsion_sin)): 74 | torsion_training[n] = math.atan2(torsion_sin[n], torsion_cos[n]) 75 | 76 | def distance_window(coord_array, acid_array): 77 | WINDOW_SIZE = 15 78 | distCA = pdist(coord_array, metric='euclidean') 79 | distCA = squareform(distCA).astype('float32') 80 | save_name = 'out.npy' 81 | mark_type = [('distance', float), ('aa', 'S10')] 82 | dist_windows = [] 83 | 84 | for i in range(len(distCA)): 85 | marked_array = [] 86 | new_array = [] 87 | for j in range(len(distCA[i])): 88 | marked_array.append((distCA[i, j], acid_array[j])) 89 | marked_array = np.array(marked_array, dtype=mark_type) 90 | marked_array = np.sort(marked_array, order='distance')[:WINDOW_SIZE] 91 | for j in range(len(marked_array)): 92 | aa = marked_array[j][1].decode('utf-8') 93 | new_array.append([marked_array[j][0]] + AA_MESSAGE[aa]) 94 | dist_windows.append(new_array) 95 | dist_windows = np.array(dist_windows).astype('float32') 96 | 97 | np.save(os.path.join(DISTANCE_WINDOW_PATH, save_name), dist_windows) 98 | print('successful') 99 | 100 | if __name__ == "__main__": 101 | atoms_info = atoms_infos(path) 102 | coord_array, acid_array = extract_coord(atoms_info) 103 | distance_window(coord_array, acid_array) 104 | -------------------------------------------------------------------------------- /Training models/distance_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from scipy.spatial.distance import pdist 4 | from scipy.spatial.distance import squareform 5 | import pathlib 6 | 7 | 8 | ALPHABET = {'A': 'ALA', 'F': 'PHE', 'C': 'CYS', 'D': 'ASP', 'N': 'ASN', 9 | 'E': 'GLU', 'Q': 'GLN', 'G': 'GLY', 'H': 'HIS', 'L': 'LEU', 10 | 'I': 'ILE', 'K': 'LYS', 'M': 'MET', 'P': 'PRO', 'R': 'ARG', 11 | 'S': 'SER', 'T': 'THR', 'V': 'VAL', 'W': 'TRP', 'Y': 'TYR'} 12 | AA_HYDROPATHICITY_INDEX = {'ARG': -4.5, 'LYS': -3.9, 'ASN': -3.5, 'ASP': -3.5, 'GLN': -3.5, 13 | 'GLU': -3.5, 'HIS': -3.2, 'PRO': -1.6, 'TYR': -1.3, 'TRP': -0.9, 14 | 'SER': -0.8, 'THR': -0.7, 'GLY': -0.4, 'ALA': 1.8, 'MET': 1.9, 15 | 'CYS': 2.5, 'PHE': 2.8, 'LEU': 3.8, 'VAL': 4.2, 'ILE': 4.5} 16 | AA_BULKINESS_INDEX = {'ARG': 14.28, 'LYS': 15.71, 'ASN': 12.82, 'ASP': 11.68, 'GLN': 14.45, 17 | 'GLU': 13.57, 'HIS': 13.69, 'PRO': 17.43, 'TYR': 18.03, 'TRP': 21.67, 18 | 'SER': 9.47, 'THR': 15.77, 'GLY': 3.4, 'ALA': 11.5, 'MET': 16.25, 19 | 'CYS': 13.46, 'PHE': 19.8, 'LEU': 21.4, 'VAL': 21.57, 'ILE': 21.4} 20 | AA_FLEXIBILITY_INDEX = {'ARG': 2.6, 'LYS': 1.9, 'ASN': 14., 'ASP': 12., 'GLN': 4.8, 21 | 'GLU': 5.4, 'HIS': 4., 'PRO': 0.05, 'TYR': 0.05, 'TRP': 0.05, 22 | 'SER': 19., 'THR': 9.3, 'GLY': 23., 'ALA': 14., 'MET': 0.05, 23 | 'CYS': 0.05, 'PHE': 7.5, 'LEU': 5.1, 'VAL': 2.6, 'ILE': 1.6} 24 | AA_MESSAGE = {} 25 | for aa_short in ALPHABET.keys(): 26 | aa_long = ALPHABET[aa_short] 27 | AA_MESSAGE.update({aa_short: [(5.5 - AA_HYDROPATHICITY_INDEX[aa_long]) / 10, 28 | AA_BULKINESS_INDEX[aa_long] / 21.67, 29 | (25. - AA_FLEXIBILITY_INDEX[aa_long]) / 25.]}) 30 | AA_MESSAGE.update({aa_long: [(5.5 - AA_HYDROPATHICITY_INDEX[aa_long]) / 10, 31 | AA_BULKINESS_INDEX[aa_long] / 21.67, 32 | (25. - AA_FLEXIBILITY_INDEX[aa_long]) / 25.]}) 33 | 34 | 35 | def extract_pn(path, filename): 36 | with open(os.path.join(path, filename), 'r') as file: 37 | message = file.readlines() 38 | ca_coos = [] 39 | seq_array = [] 40 | seq = message[3][:-1] 41 | x = message[27][:-1].split('\t') 42 | y = message[28][:-1].split('\t') 43 | z = message[29][:-1].split('\t') 44 | mask = message[31][:-1] 45 | for i in range(len(mask)): 46 | if mask[i] == '+': 47 | ca_coos.append([float(x[3 * i + 1]) / 100., float(y[3 * i + 1]) / 100., float(z[3 * i + 1]) / 100.]) 48 | aa = seq[i] 49 | seq_array.append(aa) 50 | ca_coos = np.array(ca_coos) 51 | seq_array = np.array(seq_array) 52 | return ca_coos, seq_array 53 | 54 | 55 | def extract_cif(path, filename): 56 | with open(os.path.join(path, filename), 'r') as file: 57 | message = file.readlines() 58 | ca_coos = [] 59 | seq_array = [] 60 | # for line in message[1::3]: 61 | for line in message: 62 | line = line.split() 63 | if line[3] == 'CA': 64 | x = line[10] 65 | y = line[11] 66 | z = line[12] 67 | ca_coos.append([float(x), float(y) , float(z)]) 68 | aa = line[5] 69 | seq_array.append(aa) 70 | ca_coos = np.array(ca_coos) 71 | seq_array = np.array(seq_array) 72 | return ca_coos, seq_array 73 | 74 | 75 | dataset_name = 'test_set' 76 | 77 | DATA_PATH = 'D:\protein_structure_prediction\data\dataset/test_set_atom_text' 78 | DISTANCE_MAP_PATH = 'D:\protein_structure_prediction\data\dataset/processed_data/%s/distance_map' % dataset_name 79 | DISTANCE_WINDOW_PATH = 'D:\protein_structure_prediction\data\dataset/processed_data/%s/distance_window' % dataset_name 80 | 81 | pathlib.Path(DISTANCE_MAP_PATH).mkdir(parents=True, exist_ok=True) 82 | pathlib.Path(DISTANCE_WINDOW_PATH).mkdir(parents=True, exist_ok=True) 83 | 84 | 85 | failed_filename = [] 86 | 87 | for filename in ['4FBR.npy']: 88 | filename = filename.replace('.npy', '.cif') 89 | print(filename) 90 | 91 | ca_coo_test, seq_test = extract_cif(DATA_PATH, filename) 92 | 93 | def distance_window(coord_array): 94 | WINDOW_SIZE = 15 95 | distCA = pdist(ca_coo_test, metric='euclidean') 96 | distCA = squareform(distCA).astype('float32') 97 | 98 | save_name = filename.replace('.cif', '.npy') 99 | np.save(os.path.join(DISTANCE_MAP_PATH, save_name), distCA) 100 | 101 | mark_type = [('distance', float), ('aa', 'S10')] 102 | dist_windows = [] 103 | for i in range(len(distCA)): 104 | marked_array = [] 105 | new_array = [] 106 | for j in range(len(distCA[i])): 107 | marked_array.append((distCA[i, j], seq_test[j])) 108 | marked_array = np.array(marked_array, dtype=mark_type) 109 | marked_array = np.sort(marked_array, order='distance')[:WINDOW_SIZE] 110 | for j in range(len(marked_array)): 111 | aa = marked_array[j][1].decode('utf-8') 112 | new_array.append([marked_array[j][0]] + AA_MESSAGE[aa]) 113 | dist_windows.append(new_array) 114 | dist_windows = np.array(dist_windows).astype('float32') 115 | 116 | np.save(os.path.join(DISTANCE_WINDOW_PATH, save_name), dist_windows) 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /Training models/batch_validation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from npy_data_loader import DistanceWindow 4 | from torch.utils.data import DataLoader 5 | import os 6 | from torch.nn import functional as F 7 | import math 8 | 9 | 10 | torch.cuda.set_device(0) 11 | device = torch.device('cuda:0') 12 | 13 | batch_size = 1 14 | loss_function_2 = nn.MSELoss() 15 | val_epoch = 200 16 | 17 | 18 | if torch.cuda.is_available(): 19 | print('GPU available!!!') 20 | print('MainDevice=', device) 21 | 22 | 23 | train_name = 'nr40_Split_L1_Drop05' 24 | save_dir = './outputs/' + train_name 25 | val_dir = os.path.join(save_dir, 'val') 26 | 27 | is_cross_validation = True 28 | cross_validation_fold = 10 29 | 30 | 31 | def swish_fn(x): 32 | """ Swish activation function """ 33 | return x * torch.sigmoid(x) 34 | 35 | 36 | class SplitModel(nn.Module): 37 | def __init__(self, input_dim, hidden_dim, feature_dim, output_dim): 38 | super().__init__() 39 | 40 | self.hidden1 = nn.Linear(input_dim, hidden_dim) 41 | self._bn1 = nn.BatchNorm1d(hidden_dim) 42 | 43 | self.hidden2 = nn.Linear(hidden_dim, 2*hidden_dim) 44 | self._bn2 = nn.BatchNorm1d(2*hidden_dim) 45 | 46 | self.hidden3 = nn.Linear(2*hidden_dim, hidden_dim) 47 | self._bn3 = nn.BatchNorm1d(hidden_dim) 48 | 49 | self.extract_feature = nn.Linear(hidden_dim, feature_dim) 50 | self._bn4 = nn.BatchNorm1d(feature_dim) 51 | 52 | self.lstm = nn.LSTM(feature_dim, hidden_dim, bidirectional=True) 53 | self._bn5 = nn.BatchNorm1d(2 * hidden_dim) 54 | 55 | self.sub_net1 = nn.Linear(2 * hidden_dim, hidden_dim) 56 | self.output1 = nn.Linear(hidden_dim, output_dim) 57 | 58 | self.sub_net2 = nn.Linear(2 * hidden_dim, hidden_dim) 59 | self.output2 = nn.Linear(hidden_dim, output_dim) 60 | 61 | def forward(self, arrays): 62 | hidden1 = self._bn1(swish_fn(self.hidden1(arrays))) 63 | hidden2 = self._bn2(swish_fn(self.hidden2(hidden1))) 64 | hidden3 = self._bn3(swish_fn(self.hidden3(hidden2))) 65 | features = self._bn4(swish_fn(self.extract_feature(hidden3))) 66 | 67 | hidden, _ = self.lstm(features.view(len(features), 1, -1)) 68 | hidden = self._bn5(hidden.squeeze(1)) 69 | 70 | sub_hidden1 = self.sub_net1(hidden) 71 | sub_hidden1 = F.dropout(sub_hidden1, p=0.5, training=self.training) 72 | output1 = self.output1(sub_hidden1) 73 | 74 | sub_hidden2 = self.sub_net2(hidden) 75 | sub_hidden2 = F.dropout(sub_hidden2, p=0.5, training=self.training) 76 | output2 = self.output1(sub_hidden2) 77 | 78 | output = torch.cat([output1, output2], 1) 79 | return output 80 | 81 | 82 | def validation(model, data_loader): 83 | model.eval() 84 | model.is_training = False 85 | with torch.no_grad(): 86 | loss_sum = 0 87 | 88 | for arrays, torsions, output_filename in data_loader: 89 | torsions = torsions.to(device) 90 | arrays = arrays.to(device) 91 | sincos = torsions[0][2:] 92 | pred_sincos = model(arrays[0]).squeeze(1).transpose(0, 1) 93 | 94 | inner_error = (sincos[:2] - sincos[2:]).abs() 95 | weight = torch.pow(math.e, -inner_error) 96 | loss = torch.add( 97 | torch.add( 98 | (torch.pow((pred_sincos[:2] - sincos[:2]).abs() + 1e-10, weight)).mean(), 99 | (torch.pow((pred_sincos[2:] - sincos[2:]).abs() + 1e-10, weight)).mean()), 100 | torch.sqrt(loss_function_2(pred_sincos[:2], pred_sincos[2:]))) 101 | 102 | loss_sum += float(loss) 103 | return loss_sum 104 | 105 | 106 | def main(): 107 | if is_cross_validation: 108 | for subset_index in range(cross_validation_fold): 109 | val_dataset = DistanceWindow( 110 | distance_window_path='/share/Data/processed/cif_190917/distance_window/', 111 | torsion_path='/share/Data/processed/nr40/10fold_val_subset/subset_%d/val' % subset_index) 112 | val_loader = DataLoader(dataset=val_dataset, pin_memory=True) 113 | 114 | writer = open('./outputs/%s_%d/validation_map.txt' % (train_name, subset_index), 'w') 115 | 116 | for epoch in range(val_epoch): 117 | writer.write('epoch %d\n' % epoch) 118 | val_model = torch.load('./outputs/%s_%d/%d_Linear.pth' % (train_name, subset_index, epoch)).to(device) 119 | 120 | loss_sum = validation(val_model, val_loader) 121 | mean_loss = loss_sum / len(val_dataset) 122 | 123 | writer.write('mean_val_loss=%f\n\n' % mean_loss) 124 | print('epoch %d, mean_val_loss=%f\n' % (epoch, mean_loss)) 125 | 126 | writer.close() 127 | 128 | 129 | def collect_result(): 130 | for subset_index in range(cross_validation_fold): 131 | writer = open('./outputs/%s_%d/validation_map.txt' % (train_name, subset_index), 'w') 132 | with open('./outputs/%s_%d/val_loss.txt' % (train_name, subset_index), 'r') as file: 133 | lines = file.readlines() 134 | for i in range(len(lines)): 135 | if lines[i][0] == 'v': 136 | epoch_len = i+3 137 | result_index = i 138 | break 139 | epoch = 0 140 | for result_line in lines[result_index::epoch_len]: 141 | writer.write('epoch %d\n' % epoch) 142 | epoch += 1 143 | mean_loss = result_line.split('=')[1] 144 | writer.write('mean_val_loss=%s\n\n' % mean_loss) 145 | print('subset_index %d, epoch %d, mean_val_loss=%s\n' % (subset_index, epoch, mean_loss)) 146 | writer.close() 147 | 148 | 149 | if __name__ == '__main__': 150 | # main() 151 | collect_result() 152 | 153 | -------------------------------------------------------------------------------- /webserver/rebulid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import os 4 | from scipy.spatial.distance import pdist 5 | from scipy.spatial.distance import squareform 6 | import pathlib 7 | import matplotlib.pyplot as plt 8 | 9 | L1_C = 0.5511235634596036 10 | L1_N = 0.5275157666844377 11 | # trans 12 | L2_C_trans = 1.4281242923706199 13 | R_C_trans = 0.5298886988235514 14 | L2_N_trans = 1.4076846053568244 15 | R_N_trans = 0.3797594132360668 16 | L_O_trans = 1.669968615090273 17 | R_O_trans = 1.735878468087069 18 | # cis 19 | L2_C_cis = 0.7914339670632375 20 | R_C_cis = 1.309401495255961 21 | L2_N_cis = 0.7973937679940248 22 | R_N_cis = 1.2349344835918588 23 | L_O_cis = 0.17424337647795887 24 | R_O_cis = 2.384890116717385 25 | 26 | 27 | class Coordinate(object): 28 | def __init__(self, coo): 29 | self.coo = coo 30 | self.x = self.coo[0] 31 | self.y = self.coo[1] 32 | self.z = self.coo[2] 33 | self.len = np.linalg.norm(self.coo) 34 | if self.len != 0: 35 | self.orient = self.coo/self.len 36 | 37 | 38 | def vec(a, b): 39 | return Coordinate(b.coo - a.coo) 40 | 41 | 42 | def get_coo(line): 43 | items = line.split() 44 | x = float(items[10]) 45 | y = float(items[11]) 46 | z = float(items[12]) 47 | return Coordinate(np.array([x, y, z])) 48 | 49 | 50 | def get_coos(lines): 51 | atom_coos = [] 52 | for line in lines: 53 | atom_coos.append(get_coo(line)) 54 | return atom_coos 55 | 56 | 57 | def read_pn(lines): 58 | x = lines[27].split('\t') 59 | y = lines[28].split('\t') 60 | z = lines[29].split('\t') 61 | mask = lines[31] 62 | atoms_coo = [] 63 | for i in range(len(mask) * 3): 64 | if mask[i // 3] == '+': 65 | atoms_coo.append(Coordinate(np.array([float(x[i]) / 100., float(y[i]) / 100., float(z[i]) / 100.]))) 66 | if atoms_coo[-1].len == 0: 67 | return None 68 | return atoms_coo 69 | 70 | 71 | def get_cos(cb, cd): 72 | return np.dot(cb.coo, cd.coo)/(cb.len * cd.len) 73 | 74 | 75 | def get_angle(cb, cd): 76 | return math.acos(get_cos(cb, cd)) 77 | 78 | 79 | def angle_norm(angle): 80 | return math.atan2(math.sin(angle), math.cos(angle)) 81 | 82 | 83 | def batch_angle_norm(array): 84 | return np.arctan2(np.sin(array), np.cos(array)) 85 | 86 | 87 | def get_projection(vector, axis): 88 | return Coordinate(vector.len * get_cos(vector, axis) * axis.orient) 89 | 90 | 91 | def get_sign(vector, axis): 92 | return Coordinate(vector.coo - get_projection(vector, axis).coo) 93 | 94 | 95 | # 计算以axis为轴,向量A到向量B的旋转角 96 | def get_torsion(vector_A, vector_B, axis): 97 | N = Coordinate(np.cross(axis, vector_B)) 98 | N_1 = Coordinate(np.cross(vector_A, axis)) 99 | torsion = np.sign(np.dot(vector_A, N.orient)) * math.acos(np.dot(N_1.orient, N.orient)) 100 | return torsion 101 | 102 | 103 | def distance_martix(coordinates): 104 | return squareform(pdist(coordinates, metric='euclidean')) 105 | 106 | 107 | # 计算夹角和坐标转换权重 108 | def torsion_m(vector_A, axis): 109 | # 计算法向量 110 | N_1 = Coordinate(np.cross(vector_A, axis)).orient 111 | # 旋转基向量 112 | m_weight = np.array([axis, np.cross(N_1, axis), N_1]) 113 | angle = math.acos(np.dot(axis, vector_A)) 114 | return m_weight, angle 115 | 116 | 117 | # 根据向量,旋转轴 旋转角 计算旋转过后的向量 118 | def rotation(vector_A, axis, torsion): 119 | m, angle = torsion_m(vector_A, axis) 120 | rotation_martix = [math.cos(math.pi-angle), 121 | math.sin(math.pi-angle) * math.cos(torsion), 122 | math.sin(math.pi-angle) * math.sin(torsion)] 123 | 124 | # 计算旋转后向量 125 | vector_B = np.dot(m.T, rotation_martix) 126 | return vector_B 127 | 128 | 129 | def backbone_rebuild_separated_torsion(coos, torsions_C, torsions_N): 130 | # coos: coordinates of CA only 131 | output_coos = [coos[0].coo] 132 | 133 | for k in range(len(coos) - 2): 134 | CA1 = coos[k] 135 | CA2 = coos[k + 1] 136 | CA3 = coos[k + 2] 137 | CA2CA3 = vec(CA2, CA3) 138 | CA1CA2 = vec(CA1, CA2) 139 | 140 | initial_orient = get_sign(CA2CA3, CA1CA2).orient 141 | axis = CA1CA2.orient 142 | torsion_pred_C = torsions_C[k] 143 | torsion_pred_N = torsions_N[k] 144 | if CA1CA2.len > 3.4: 145 | L2_C, L2_N, L_O, R_C, R_N, R_O = L2_C_trans, L2_N_trans, L_O_trans, R_C_trans, R_N_trans, R_O_trans 146 | torsion_pred_N = angle_norm(torsion_pred_N - math.pi) 147 | else: 148 | L2_C, L2_N, L_O, R_C, R_N, R_O = L2_C_cis, L2_N_cis, L_O_cis, R_C_cis, R_N_cis, R_O_cis 149 | 150 | output_C1 = CA1.coo + L2_C * CA1CA2.orient + R_C * rotation(initial_orient, axis, torsion_pred_C) 151 | output_O1 = CA1.coo + L_O * CA1CA2.orient + R_O * rotation(initial_orient, axis, torsion_pred_C) 152 | output_N2 = CA2.coo - L2_N * CA1CA2.orient + R_N * rotation(initial_orient, axis, torsion_pred_N) 153 | output_coos += [output_C1, output_O1, output_N2, CA2.coo] 154 | 155 | CA1 = coos[-3] 156 | CA2 = coos[-2] 157 | CA3 = coos[-1] 158 | CA2CA3 = vec(CA2, CA3) 159 | CA2CA1 = vec(CA2, CA1) 160 | 161 | initial_orient = get_sign(CA2CA1, CA2CA3).orient 162 | axis = CA2CA3.orient 163 | torsion_pred_C = torsions_C[-1] 164 | torsion_pred_N = torsions_N[-1] 165 | if CA2CA3.len > 3.4: 166 | L2_C, L2_N, L_O, R_C, R_N, R_O = L2_C_trans, L2_N_trans, L_O_trans, R_C_trans, R_N_trans, R_O_trans 167 | torsion_pred_N = angle_norm(torsion_pred_N - math.pi) 168 | else: 169 | L2_C, L2_N, L_O, R_C, R_N, R_O = L2_C_cis, L2_N_cis, L_O_cis, R_C_cis, R_N_cis, R_O_cis 170 | 171 | output_C2 = CA2.coo + L2_C * CA2CA3.orient + R_C * rotation(initial_orient, axis, torsion_pred_C) 172 | output_O2 = CA2.coo + L_O * CA2CA3.orient + R_O * rotation(initial_orient, axis, torsion_pred_C) 173 | output_N3 = CA3.coo - L2_N * CA2CA3.orient + R_N * rotation(initial_orient, axis, torsion_pred_N) 174 | output_coos += [output_C2, output_O2, output_N3, CA3.coo] 175 | output_coos = np.array(output_coos) 176 | return output_coos 177 | -------------------------------------------------------------------------------- /Training models/make_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -* 2 | import os 3 | import time 4 | import pathlib 5 | import argparse 6 | from process import Process 7 | 8 | parser = argparse.ArgumentParser(description='manual to this script') 9 | parser.add_argument('--resolution', type=int, default='3000', 10 | help='output resolution') 11 | parser.add_argument('--dataset_path', type=str, default='D:\protein_structure_prediction\data\dataset/casp12_sep', 12 | help='path of dataset') 13 | parser.add_argument('--output_path', type=str, default='D:\protein_structure_prediction\data\dataset\processed_data', 14 | help='path of output') 15 | parser.add_argument('--dataset', type=str, default='training_95', 16 | help='name of dataset folder, training_95|bc-30-1_CA|bc-30-1_chains|cif_filtered|cif_fragment|nr90') 17 | parser.add_argument('--input_type', type=str, default='pn', 18 | help='type of input file, pn|cif|pdb') 19 | parser.add_argument('--output_type', type=str, default='image', 20 | help='output_format, images|distance_map|relocated_coordinate') 21 | parser.add_argument('--axis_range', type=int, default='100', 22 | help='map range of structures, 42|64|84') 23 | parser.add_argument('--multi_process', type=bool, default=False, 24 | help='multi process or not') 25 | parser.add_argument('--multi_atom', type=bool, default=False, 26 | help='input all backbone atoms or CA only') 27 | parser.add_argument('--self_norm_ser_num', type=bool, default=False, 28 | help='self normalized serial number') 29 | parser.add_argument('--draw_connection', type=bool, default=True, 30 | help='draw dots connection or not') 31 | parser.add_argument('--crop', type=bool, default=True, 32 | help='crop image before output') 33 | parser.add_argument('--aminoacid_message', type=bool, default=True, 34 | help='mark amino acid with hydropathicity, bulkiness and flexibility or 1.') 35 | parser.add_argument('--z_norm', type=float, default=64., 36 | help='normalize range of z value') 37 | parser.add_argument('--pairs_data', action='store_true', default=False, 38 | help='pairs_data') 39 | parser.add_argument('--test', action='store_true', default=True, 40 | help='test mode') 41 | parser.add_argument('--filenames_list', type=str, default='validation_len_under_200.txt', 42 | help='read input filenames in list') 43 | parser.add_argument('--sliding_window', action='store_true', default=True, 44 | help='save outputs as sliding window') 45 | parser.add_argument('--window_reorient', action='store_true', default=True, 46 | help='reorientation for normalize every sliding window') 47 | argparses = parser.parse_args() 48 | 49 | 50 | class MakeDataset(object): 51 | def __init__(self, args): 52 | self.args = args 53 | self.input_folder = os.path.join(args.dataset_path, args.dataset) 54 | if args.filenames_list: 55 | with open(os.path.join(args.dataset_path, args.filenames_list), 'r') as file: 56 | self.filenames = file.read().split('\n') 57 | else: 58 | self.filenames = os.listdir(self.input_folder) 59 | self.output_folders = {} 60 | 61 | def run(self): 62 | output_folder = os.path.join(self.args.output_path, self.args.dataset, time.strftime("%Y%m%d_%H%M", 63 | time.localtime())) 64 | log_folder = os.path.join(self.args.output_path, self.args.dataset) 65 | self.make_folders(output_folder) 66 | self.write_log(log_folder) 67 | for filename in self.filenames: 68 | Process(self.args, filename, self.output_folders) 69 | 70 | def test(self, sample_num=5): 71 | output_folder = self.args.output_path + '/test_sample' 72 | self.make_folders(output_folder) 73 | self.write_log(output_folder) 74 | for filename in ['4KE2_1_A.pn']: # self.filenames[:sample_num]: 75 | Process(self.args, filename, self.output_folders).process_for_data_loader_test() 76 | 77 | def make_folders(self, output_folder): 78 | self.output_folders.update({'output': output_folder}) 79 | if self.args.pairs_data: 80 | query_folder = output_folder + '/query' 81 | target_folder = output_folder + '/target' 82 | pathlib.Path(query_folder).mkdir(parents=True, exist_ok=True) 83 | pathlib.Path(target_folder).mkdir(parents=True, exist_ok=True) 84 | self.output_folders.update({'query': query_folder, 'target': target_folder}) 85 | else: 86 | pathlib.Path(output_folder).mkdir(parents=True, exist_ok=True) 87 | 88 | def write_log(self, path): 89 | args = self.args 90 | write_list = [time.strftime("%Y%m%d_%H%M", time.localtime())] 91 | arg_name_list = ['dataset', 92 | 'resolution', 93 | 'input_type', 94 | 'output_type', 95 | 'axis_range', 96 | 'multi_atom', 97 | 'self_norm_ser_num', 98 | 'draw_connection', 99 | 'z_norm'] 100 | arg_list = [args.dataset, 101 | args.resolution, 102 | args.input_type, 103 | args.output_type, 104 | args.axis_range, 105 | args.multi_atom, 106 | args.self_norm_ser_num, 107 | args.draw_connection, 108 | args.z_norm] 109 | for i in range(len(arg_name_list)): 110 | print("%s = %s" % (arg_name_list[i], str(arg_list[i]))) 111 | write_list.append("%s = %s" % (arg_name_list[i], str(arg_list[i]))) 112 | write_list.append('\n\n\n') 113 | with open(path + '/args_log.txt', 'a') as log_writer: 114 | log_writer.write('\n'.join(write_list)) 115 | 116 | 117 | if __name__ == '__main__': 118 | if argparses.test: 119 | MakeDataset(argparses).test() 120 | else: 121 | MakeDataset(argparses).run() 122 | -------------------------------------------------------------------------------- /extract_coord.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import os 4 | from numpy import * 5 | from scipy.spatial.distance import pdist 6 | from scipy.spatial.distance import squareform 7 | import pathlib 8 | from model import * 9 | from rebulid import * 10 | path = 'D:\\backbone_prediction' 11 | 12 | ALPHABET = {'A': 'ALA', 'F': 'PHE', 'C': 'CYS', 'D': 'ASP', 'N': 'ASN', 13 | 'E': 'GLU', 'Q': 'GLN', 'G': 'GLY', 'H': 'HIS', 'L': 'LEU', 14 | 'I': 'ILE', 'K': 'LYS', 'M': 'MET', 'P': 'PRO', 'R': 'ARG', 15 | 'S': 'SER', 'T': 'THR', 'V': 'VAL', 'W': 'TRP', 'Y': 'TYR'} 16 | 17 | AA_HYDROPATHICITY_INDEX = {'ARG': -4.5, 'LYS': -3.9, 'ASN': -3.5, 'ASP': -3.5, 'GLN': -3.5, 18 | 'GLU': -3.5, 'HIS': -3.2, 'PRO': -1.6, 'TYR': -1.3, 'TRP': -0.9, 19 | 'SER': -0.8, 'THR': -0.7, 'GLY': -0.4, 'ALA': 1.8, 'MET': 1.9, 20 | 'CYS': 2.5, 'PHE': 2.8, 'LEU': 3.8, 'VAL': 4.2, 'ILE': 4.5} 21 | 22 | AA_BULKINESS_INDEX = {'ARG': 14.28, 'LYS': 15.71, 'ASN': 12.82, 'ASP': 11.68, 'GLN': 14.45, 23 | 'GLU': 13.57, 'HIS': 13.69, 'PRO': 17.43, 'TYR': 18.03, 'TRP': 21.67, 24 | 'SER': 9.47, 'THR': 15.77, 'GLY': 3.4, 'ALA': 11.5, 'MET': 16.25, 25 | 'CYS': 13.46, 'PHE': 19.8, 'LEU': 21.4, 'VAL': 21.57, 'ILE': 21.4} 26 | 27 | AA_FLEXIBILITY_INDEX = {'ARG': 2.6, 'LYS': 1.9, 'ASN': 14., 'ASP': 12., 'GLN': 4.8, 28 | 'GLU': 5.4, 'HIS': 4., 'PRO': 0.05, 'TYR': 0.05, 'TRP': 0.05, 29 | 'SER': 19., 'THR': 9.3, 'GLY': 23., 'ALA': 14., 'MET': 0.05, 30 | 'CYS': 0.05, 'PHE': 7.5, 'LEU': 5.1, 'VAL': 2.6, 'ILE': 1.6} 31 | 32 | AA_MESSAGE = {} 33 | 34 | for aa_short in ALPHABET.keys(): 35 | aa_long = ALPHABET[aa_short] 36 | AA_MESSAGE.update({aa_short: [(5.5 - AA_HYDROPATHICITY_INDEX[aa_long]) / 10, 37 | AA_BULKINESS_INDEX[aa_long] / 21.67, 38 | (25. - AA_FLEXIBILITY_INDEX[aa_long]) / 25.]}) 39 | 40 | AA_MESSAGE.update({aa_long: [(5.5 - AA_HYDROPATHICITY_INDEX[aa_long]) / 10, 41 | AA_BULKINESS_INDEX[aa_long] / 21.67, 42 | (25. - AA_FLEXIBILITY_INDEX[aa_long]) / 25.]}) 43 | 44 | distance_window_path = os.path.join(path, 'distance_window') 45 | 46 | path_CA = 'D:\\backbone prediction\\CA_info' 47 | atoms_type = ['N', 'CA', 'C', 'O'] 48 | 49 | 50 | # 提取CA原子信息 51 | def atoms_infos(file_name): 52 | file = open(os.path.join(path_CA, file_name), 'r') 53 | lines = file.readlines() 54 | array_head_tail = np.zeros((3, 3)) 55 | atoms_info = [line.strip('\n') for line in lines 56 | if line.split()[0] == 'ATOM' and line.split()[2] in atoms_type] 57 | for line in lines: 58 | if line.split()[0] == 'ATOM' and line.split()[2] == 'N': 59 | array_head_tail[0] = [float(line.split()[j]) for j in range(6, 9)] 60 | break 61 | 62 | for line in lines[::-1]: 63 | if line.split()[0] == 'ATOM' and line.split()[2] == 'C': 64 | array_head_tail[1] = [float(line.split()[j]) for j in range(6, 9)] 65 | break 66 | 67 | for line in lines[::-1]: 68 | if line.split()[0] == 'ATOM' and line.split()[2] == 'O': 69 | array_head_tail[2] = [float(line.split()[j]) for j in range(6, 9)] 70 | break 71 | 72 | delet = [] 73 | # 筛掉重复概率小的氨基酸 74 | for i in range(len(atoms_info)): 75 | if atoms_info[i - 1].split()[2] == atoms_info[i].split()[2] and \ 76 | atoms_info[i - 1].split()[5] == atoms_info[i].split()[5]: 77 | if atoms_info[i - 1].split()[-3] <= atoms_info[i].split()[-3]: 78 | delet.append(i - 1) 79 | else: 80 | delet.append(i) 81 | for i in delet[::-1]: 82 | del atoms_info[i] 83 | # atoms_info = array(atoms_info) 84 | return atoms_info, array_head_tail 85 | 86 | 87 | # 断链情况是否进行补全 88 | # 提取坐标信息 89 | def extract_coord(atoms_info): 90 | coord_array = np.zeros((len(atoms_info) // 4, 3)) 91 | coord_all = np.zeros((len(atoms_info), 3)) 92 | acid_list = [] 93 | for i in range(len(atoms_info)): 94 | 95 | coord_all[i] = [float(atoms_info[i].split()[j]) for j in range(6, 9)] 96 | if i % 4 == 1: 97 | coord_array[i // 4] = [float(atoms_info[i].split()[j]) for j in range(6, 9)] 98 | acid_list.append(atoms_info[i].split()[3][-3::]) 99 | acid_array = array(acid_list) 100 | return coord_array, acid_array, coord_all 101 | 102 | 103 | def torsion(): 104 | for n in range(len(torsion_sin)): 105 | torsion_training[n] = math.atan2(torsion_sin[n], torsion_cos[n]) 106 | 107 | 108 | def distance_window(coord_array, acid_array): 109 | WINDOW_SIZE = 15 110 | distCA = pdist(coord_array, metric='euclidean') 111 | distCA = squareform(distCA).astype('float32') 112 | save_name = file_name.replace('pdb', 'npy') 113 | mark_type = [('distance', float), ('aa', 'S10')] 114 | dist_windows = [] 115 | 116 | for i in range(len(distCA)): 117 | marked_array = [] 118 | new_array = [] 119 | for j in range(len(distCA[i])): 120 | marked_array.append((distCA[i, j], acid_array[j])) 121 | marked_array = np.array(marked_array, dtype=mark_type) 122 | marked_array = np.sort(marked_array, order='distance')[:WINDOW_SIZE] 123 | for j in range(len(marked_array)): 124 | aa = marked_array[j][1].decode('utf-8') 125 | new_array.append([marked_array[j][0]] + AA_MESSAGE[aa]) 126 | dist_windows.append(new_array) 127 | dist_windows = np.array(dist_windows).astype('float32') 128 | 129 | np.save(os.path.join(distance_window_path, save_name), dist_windows) 130 | print('successful') 131 | 132 | 133 | if __name__ == "__main__": 134 | #提取坐标信息计算windows——distance 135 | COOR_PATH = 'D:\\backbone_prediction\\coord' 136 | for file_name in os.listdir(path_CA): 137 | atoms_info, array_head_tail = atoms_infos(file_name) 138 | coord_array, acid_array, coord_all = extract_coord(atoms_info) 139 | distance_window(coord_array, acid_array) 140 | test_dataset = DistanceWindow( 141 | distance_window_path='D:/backbone_prediction/distance_window') 142 | data_loader = DataLoader(dataset=test_dataset) 143 | np.save(os.path.join(COOR_PATH, file_name.replace('pdb', 'npy')), coord_all) 144 | #融合50个模型的角度 145 | 146 | models_path = os.path.join(path, 'top_models') 147 | with torch.no_grad(): 148 | for arrays, torsions, output_filename in data_loader: 149 | total_file = 0 150 | for model_name in os.listdir(models_path): 151 | model = torch.load(os.path.join(models_path, model_name), map_location='cuda:0') 152 | model.eval() 153 | model.is_training = False 154 | arrays = arrays.to(device) 155 | pred_sincos = model(arrays[0]).squeeze(1).transpose(0, 1) 156 | output = pred_sincos.data.cpu().numpy() 157 | total_file += output 158 | np.save(os.path.join('D:\\backbone_prediction\\eric_rebulid', output_filename[0]), total_file) 159 | #根据预测角度复原坐标 160 | filename = output_filename[0] 161 | coos = [] 162 | ground_true_coos = np.load(os.path.join(COOR_PATH, filename)) 163 | for coo in ground_true_coos[1::4]: 164 | coos.append(Coordinate(coo)) 165 | 166 | PATH_OUTPUT = PATH_PRED + '_backbone' 167 | pathlib.Path(PATH_OUTPUT).mkdir(parents=True, exist_ok=True) 168 | pred = np.load(os.path.join(PATH_PRED, filename)) 169 | 170 | torsions_C = np.arctan2(pred[0], pred[1]) 171 | torsions_N = np.arctan2(pred[2], pred[3]) 172 | 173 | backbone_pred = backbone_rebuild_separated_torsion(coos, torsions_C, torsions_N) 174 | backbone_pred = np.concatenate((ground_true_coos[0].reshape([1, 3]), 175 | backbone_pred, ground_true_coos[-2:]), axis=0).astype('float32') 176 | np.save(os.path.join(PATH_OUTPUT, filename), backbone_pred) 177 | 178 | -------------------------------------------------------------------------------- /Training models/modelable_assess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import os 4 | from scipy.spatial.distance import pdist 5 | from scipy.spatial.distance import squareform 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | L1_C = 0.5511235634596036 10 | L1_N = 0.5275157666844377 11 | # trans 12 | L2_C_trans = 1.4281242923706199 13 | R_C_trans = 0.5298886988235514 14 | L2_N_trans = 1.4076846053568244 15 | R_N_trans = 0.3797594132360668 16 | L_O_trans = 1.6340680296346668 17 | R_O_trans = 1.7458955685095028 18 | # cis 19 | L2_C_cis = 0.7914339670632375 20 | R_C_cis = 1.309401495255961 21 | L2_N_cis = 0.7973937679940248 22 | R_N_cis = 1.2349344835918588 23 | L_O_cis = 0.17424337647795887 24 | R_O_cis = 2.384890116717385 25 | 26 | 27 | class Coordinate(object): 28 | def __init__(self, coo): 29 | self.coo = coo 30 | self.x = self.coo[0] 31 | self.y = self.coo[1] 32 | self.z = self.coo[2] 33 | self.len = np.linalg.norm(self.coo) 34 | if self.len != 0: 35 | self.orient = self.coo/self.len 36 | 37 | 38 | def vec(a, b): 39 | ab = b.coo - a.coo 40 | return Coordinate(ab) 41 | 42 | 43 | def get_coo(line): 44 | items = line.split() 45 | x = float(items[10]) 46 | y = float(items[11]) 47 | z = float(items[12]) 48 | return Coordinate(np.array([x, y, z])) 49 | 50 | 51 | def get_coos(lines): 52 | atom_coos = [] 53 | for line in lines: 54 | if line.split()[3] != 'CB': 55 | atom_coos.append(get_coo(line)) 56 | # print(line.split()[3], len(atom_coos) % 4) 57 | return atom_coos 58 | 59 | 60 | def read_pn(lines): 61 | x = lines[27].split('\t') 62 | y = lines[28].split('\t') 63 | z = lines[29].split('\t') 64 | mask = lines[31] 65 | atoms_coo = [] 66 | for i in range(len(mask) * 3): 67 | if mask[i // 3] == '+': 68 | atoms_coo.append(Coordinate(np.array([float(x[i]) / 100., float(y[i]) / 100., float(z[i]) / 100.]))) 69 | if atoms_coo[-1].len == 0: 70 | return None 71 | return atoms_coo 72 | 73 | 74 | def get_cos(cb, cd): 75 | cos = np.dot(cb.coo, cd.coo)/(cb.len * cd.len) 76 | return cos 77 | 78 | 79 | def get_angle(cb, cd): 80 | angle = math.acos(get_cos(cb, cd)) 81 | return angle 82 | 83 | 84 | def angle_norm(angle): 85 | normed_angle = math.atan2(math.sin(angle), math.cos(angle)) 86 | return normed_angle 87 | 88 | 89 | def array_angle_norm(array): 90 | normed_array = [] 91 | for angle in array: 92 | normed_array.append(angle_norm(angle)) 93 | return np.array(normed_array) 94 | 95 | 96 | def get_projection(vec, axis): 97 | projection = Coordinate(vec.len * get_cos(vec, axis) * axis.orient) 98 | return projection 99 | 100 | 101 | def get_sign(vec, axis): 102 | sign = Coordinate(vec.coo - get_projection(vec, axis).coo) 103 | return sign 104 | 105 | 106 | # 计算以axis为轴,向量A到向量B的旋转角 107 | def torsion(vector_A, vector_B, axis): 108 | N = Coordinate(np.cross(axis, vector_B)) 109 | N_1 = Coordinate(np.cross(vector_A, axis)) 110 | torsion = np.sign(np.dot(vector_A, N.orient)) * math.acos(np.dot(N_1.orient, N.orient)) 111 | return torsion 112 | 113 | 114 | # 计算夹角和坐标转换权重 115 | def torsion_m(vector_A, axis): 116 | #计算法向量 117 | N_1 = Coordinate(np.cross(vector_A,axis)).orient 118 | #旋转基向量 119 | m_weight = np.array([axis , np.cross(N_1,axis) , N_1]) 120 | angle = math.acos(np.dot(axis,vector_A)) 121 | return m_weight, angle 122 | 123 | 124 | # 根据向量,旋转轴 旋转角 计算旋转过后的向量 125 | def rotation(vector_A, axis, torsion): 126 | m, angle = torsion_m(vector_A, axis) 127 | rotation_martix=[math.cos(math.pi-angle), 128 | math.sin(math.pi-angle) * math.cos(torsion), 129 | math.sin(math.pi-angle) * math.sin(torsion)] 130 | 131 | #计算旋转后向量 132 | vector_B = np.dot(m.T, rotation_martix) 133 | return vector_B 134 | 135 | 136 | def distance_martix(A): 137 | # A是一个向量矩阵:euclidean代表欧式距离 138 | distA=pdist(A, metric='euclidean') 139 | # 将distA数组变成一个矩阵 140 | distB = squareform(distA) 141 | return distB 142 | 143 | 144 | def backbone_rebuild_separated_torsion(coos, torsions_C, torsions_N): 145 | # coos: coordinates of CA only 146 | output_coos = [coos[0].coo] 147 | 148 | for k in range(len(coos) - 2): 149 | CA1 = coos[k] 150 | CA2 = coos[k + 1] 151 | CA3 = coos[k + 2] 152 | CA2CA3 = vec(CA2, CA3) 153 | CA1CA2 = vec(CA1, CA2) 154 | 155 | initial_orient = get_sign(CA2CA3, CA1CA2).orient 156 | axis = CA1CA2.orient 157 | torsion_pred_C = torsions_C[k] 158 | torsion_pred_N = torsions_N[k] 159 | if CA1CA2.len > 3.4: 160 | L2_C, L2_N, L_O, R_C, R_N, R_O = L2_C_trans, L2_N_trans, L_O_trans, R_C_trans, R_N_trans, R_O_trans 161 | torsion_pred_N = angle_norm(torsion_pred_N - math.pi) 162 | else: 163 | L2_C, L2_N, L_O, R_C, R_N, R_O = L2_C_cis, L2_N_cis, L_O_cis, R_C_cis, R_N_cis, R_O_cis 164 | 165 | output_C1 = CA1.coo + L2_C * CA1CA2.orient + R_C * rotation(initial_orient, axis, torsion_pred_C) 166 | output_O1 = CA1.coo + L_O * CA1CA2.orient + R_O * rotation(initial_orient, axis, torsion_pred_C) 167 | output_N2 = CA2.coo - L2_N * CA1CA2.orient + R_N * rotation(initial_orient, axis, torsion_pred_N) 168 | output_coos += [output_C1, output_O1, output_N2, CA2.coo] 169 | 170 | CA1 = coos[-3] 171 | CA2 = coos[-2] 172 | CA3 = coos[-1] 173 | CA2CA3 = vec(CA2, CA3) 174 | CA2CA1 = vec(CA2, CA1) 175 | 176 | initial_orient = get_sign(CA2CA1, CA2CA3).orient 177 | axis = CA2CA3.orient 178 | torsion_pred_C = torsions_C[-1] 179 | torsion_pred_N = torsions_N[-1] 180 | if CA2CA3.len > 3.4: 181 | L2_C, L2_N, L_O, R_C, R_N, R_O = L2_C_trans, L2_N_trans, L_O_trans, R_C_trans, R_N_trans, R_O_trans 182 | torsion_pred_N = angle_norm(torsion_pred_N - math.pi) 183 | else: 184 | L2_C, L2_N, L_O, R_C, R_N, R_O = L2_C_cis, L2_N_cis, L_O_cis, R_C_cis, R_N_cis, R_O_cis 185 | 186 | output_C2 = CA2.coo + L2_C * CA2CA3.orient + R_C * rotation(initial_orient, axis, torsion_pred_C) 187 | output_O2 = CA2.coo + L_O * CA2CA3.orient + R_O * rotation(initial_orient, axis, torsion_pred_C) 188 | output_N3 = CA3.coo - L2_N * CA2CA3.orient + R_N * rotation(initial_orient, axis, torsion_pred_N) 189 | output_coos += [output_C2, output_O2, output_N3, CA3.coo] 190 | output_coos = np.array(output_coos) 191 | return output_coos 192 | 193 | 194 | # PATH = 'D:\protein_structure_prediction\data\dataset/test_set_withO' 195 | PATH = 'D:\protein_structure_prediction\data\dataset/processed_data/test_set\coordinates' 196 | REBUILD_PATH = 'D:\protein_structure_prediction\data\dataset/processed_data/test_set/rebuild_coordinates' 197 | BITORSION_PATH = 'D:\protein_structure_prediction\data\dataset/processed_data/test_set/bitorsions_' 198 | 199 | 200 | atom_missed_filenames = [] 201 | failed_filenames = [] 202 | filenames = os.listdir(PATH) 203 | # for filename in [filenames[0]]: 204 | for filename in filenames: 205 | print(filename) 206 | try: 207 | gt_coos = np.load(os.path.join(PATH, filename)) 208 | if np.shape(gt_coos)[0] % 4 != 0: 209 | atom_missed_filenames.append(filename) 210 | 211 | else: 212 | torsions_C = [] 213 | torsions_N = [] 214 | coos = [] 215 | for coo in gt_coos: 216 | coos.append(Coordinate(coo)) 217 | 218 | for k in range(len(coos) // 4 - 2): 219 | k *= 4 220 | CA1 = coos[1 + k] 221 | C1 = coos[2 + k] 222 | O1 = coos[3 + k] 223 | N2 = coos[4 + k] 224 | CA2 = coos[5 + k] 225 | CA3 = coos[9 + k] 226 | 227 | CA2CA3 = vec(CA2, CA3) 228 | CA1CA2 = vec(CA1, CA2) 229 | CA2CA1 = vec(CA2, CA1) 230 | CA1C1 = vec(CA1, C1) 231 | CA2N2 = vec(CA2, N2) 232 | 233 | torsions_C.append(torsion(CA2CA3.orient, CA1C1.orient, CA1CA2.orient)) 234 | if CA1CA2.len > 3.4: 235 | torsions_N.append(angle_norm(torsion(CA2CA3.orient, CA2N2.orient, CA1CA2.orient) - math.pi)) 236 | else: 237 | torsions_N.append(torsion(CA2CA3.orient, CA2N2.orient, CA1CA2.orient)) 238 | 239 | k = (len(coos) // 4 - 3) * 4 240 | CA1 = coos[1 + k] 241 | CA2 = coos[5 + k] 242 | C2 = coos[6 + k] 243 | O2 = coos[7 + k] 244 | N3 = coos[8 + k] 245 | CA3 = coos[9 + k] 246 | 247 | CA2CA3 = vec(CA2, CA3) 248 | CA3CA2 = vec(CA3, CA2) 249 | CA2CA1 = vec(CA2, CA1) 250 | CA2C2 = vec(CA2, C2) 251 | CA3N3 = vec(CA3, N3) 252 | 253 | torsions_C.append(torsion(CA2CA1.orient, CA2C2.orient, CA2CA3.orient)) 254 | if CA2CA3.len > 3.4: 255 | torsions_N.append(angle_norm(torsion(CA2CA1.orient, CA3N3.orient, CA2CA3.orient) - math.pi)) 256 | else: 257 | torsions_N.append(torsion(CA2CA1.orient, CA3N3.orient, CA2CA3.orient)) 258 | 259 | torsions_N = np.array(torsions_N) 260 | torsions_C = np.array(torsions_C) 261 | bitorsions = np.array([torsions_C / math.pi, 262 | torsions_N / math.pi, 263 | np.sin(torsions_C), 264 | np.cos(torsions_C), 265 | np.sin(torsions_N), 266 | np.cos(torsions_N)]).astype('float32') 267 | np.save(os.path.join(BITORSION_PATH, filename), bitorsions) 268 | 269 | rebuild_coos = np.concatenate((gt_coos[:1], 270 | backbone_rebuild_separated_torsion(coos[1::4], torsions_C, torsions_N), 271 | gt_coos[-2:]), axis=0).astype('float32') 272 | np.save(os.path.join(REBUILD_PATH, filename), rebuild_coos) 273 | # print(np.shape(rebuild_coos), np.shape(gt_coos)) 274 | # print(np.linalg.norm(rebuild_coos - gt_coos, axis=1)) 275 | # print(np.linalg.norm(rebuild_coos[::4] - gt_coos[::4], axis=1).mean()) 276 | # print(np.linalg.norm(rebuild_coos[2::4] - gt_coos[2::4], axis=1).mean()) 277 | 278 | except Exception: 279 | failed_filenames.append(filename) 280 | -------------------------------------------------------------------------------- /computation_rmsd.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import numpy as np 4 | import xlrd 5 | import xlwt 6 | from numpy import * 7 | import pandas as pd 8 | 9 | #通过两坐标计算单位向量 10 | def vector_unit(vector_1,vector_2): 11 | bond_vector_2 = vector_1 - vector_2 12 | bond_length_2 = np.linalg.norm(bond_vector_2) 13 | return bond_vector_2 / bond_length_2 14 | 15 | 16 | #计算法向量和旋转角 17 | def torsion_angle(A, B, C, D): 18 | #计算法向量 19 | U_2 = vector_unit(B,A); U_1 = vector_unit(C,B); U = vector_unit(D,C) 20 | N = np.cross(U_1, U) / np.linalg.norm(np.cross(U_1, U)) 21 | N_1 = np.cross(U_2, U_1) / np.linalg.norm(np.cross(U_2, U_1)) 22 | m_weight = np.array([U_1, np.cross(N_1, U_1), N_1]) 23 | #torsion_angle 24 | angle = np.sign(np.dot(U_2, N)) * math.acos(np.dot(N_1, N)) 25 | return angle, m_weight 26 | 27 | 28 | #根据真实角度或训练角度预测下一个坐标 29 | def next_coord(A, B, C, D, R, angle_confirm,torsion_pred): 30 | #torsion_angle 31 | angle_real , m = torsion_angle(A, B, C, D) 32 | #将真实角度或预测角度赋值给torsion 33 | torsion = torsion_pred 34 | # print("N——angle:",angle_real,angle_train) 35 | angle_martix=[math.cos(math.pi-angle_confirm), 36 | math.sin(math.pi-angle_confirm) * math.cos(torsion), 37 | math.sin(math.pi-angle_confirm) * math.sin(torsion)] 38 | #计算下一个坐标 39 | next_corrd = C + R * np.dot(m.T, angle_martix) 40 | return next_corrd, torsion 41 | 42 | 43 | #计算预测的CB位置 44 | def pred_CBcoord(N_coord, CA_coord, C_coord, CB_coord): 45 | # N和C的中间向量 46 | vector_midleline = (vector_unit(N_coord, CA_coord) + vector_unit(C_coord, CA_coord)) 47 | vector_midleline_unit = vector_midleline / np.linalg.norm(vector_midleline) 48 | 49 | C = CA_coord + vector_midleline_unit * 0.841829775235248 50 | # C2 = N_coord + vector_unit(C_coord, N_coord) * 1.190426725853957 51 | angle_confirm = math.pi / 2 52 | 53 | # 统计CA到CB的距离: R = np.linalg.norm(C1 - CB_coord) 54 | R = 2.1545175870366853 # 统计得到的CA到CB的距离 55 | torsion = 0.5999114448494303 # 根据统计得到的旋转角 56 | 57 | # 计算得到预测的CB位置 58 | next_CB_coord, t = next_coord(CA_coord, N_coord, C, CB_coord, R, angle_confirm, torsion) 59 | return next_CB_coord 60 | 61 | 62 | #从真实的pdb中提取对应原子坐标信息 63 | def extract_info_from_pdb(path_file_real): 64 | delet = [] 65 | # 获取真实pdb文件坐标 66 | f_real = open(path_file_real, 'r'); 67 | real_lines = f_real.readlines() 68 | 69 | # 提取对应原子的信息存到列表real 70 | real = [line for line in real_lines if line.split()[0] == 'ATOM' and line.split()[2] in atoms_type] 71 | f_real.close() 72 | 73 | # 筛掉重复概率小的氨基酸 74 | for i in range(len(real)): 75 | if real[i - 1].split()[2] == real[i].split()[2] and real[i - 1].split()[5] == real[i].split()[5]: 76 | if real[i - 1].split()[-3] <= real[i].split()[-3]: 77 | delet.append(i - 1) 78 | else: 79 | delet.append(i) 80 | for i in delet[::-1]: 81 | del real[i] 82 | return real 83 | 84 | 85 | # 记录该氨基酸是否存在CB 86 | def CB_determine(real): 87 | real_CB = [] 88 | CB_whether_exist = [] 89 | real_array_without_CB = [] 90 | real_array = np.zeros((len(real), 3)) 91 | # real_with_CB = np.zeros((len(real), 3)) 92 | for i in range(len(real)): 93 | real_array[i] = np.array([float(real[i].split()[j]) for j in range(6, 9)]) 94 | #为了判定该CA处是否存在CB 95 | if real[i].split()[2] == atoms_type[1]: 96 | for line in range(-1, len(atoms_type) - 2): 97 | real_array_without_CB.append([float(real[i + line].split()[j]) for j in range(6, 9)]) 98 | 99 | if real[i].split()[3] == 'GLY': 100 | CB_whether_exist.append('-') 101 | else: 102 | CB_coord = np.array([float(real[i + len(atoms_type) - 2].split()[j]) for j in range(6, 9)]) 103 | # 检查重构CB和真实CB的误差 104 | # next_CB = pred_CBcoord(N_coord, CA_coord, C_coord, CB_coord) 105 | real_CB.append(CB_coord) 106 | CB_whether_exist.append('+') 107 | real_array_without_CB = array(real_array_without_CB) 108 | 109 | return real_CB, CB_whether_exist, real_array, real_array_without_CB 110 | 111 | 112 | #从预测的pdb中提取对应原子坐标信息 113 | def extract_info_from_pred(CB_whether_exist, path_pred): 114 | gen = [] 115 | length_file = CB_whether_exist.count('+') * len(atoms_type) + CB_whether_exist.count('-') * (len(atoms_type) - 1) 116 | all_atoms = {'N': 0, 'CA': 1, 'C': 2, 'O': 3} 117 | if path_pred.endswith('.pdb'): 118 | # pdb格式读取 119 | path_pred = path_pred.replace('real', 'pd2_out') 120 | f_gen = open(path_pred, 'r') 121 | gen_lines = f_gen.readlines() 122 | 123 | for line in range(len(gen_lines)): 124 | if gen_lines[line].split()[0] == 'ATOM' and gen_lines[line].split()[2] == 'CB': 125 | gen.append(gen_lines[line]) 126 | f_gen.close() 127 | # 提取全部坐标为array,提取CB坐标为array_CB 128 | pred_array = np.zeros((length_file, 3)) 129 | pred_CB = np.zeros((CB_whether_exist.count('+'), 3)) 130 | pred_array_without_CB = np.zeros((length_file - CB_whether_exist.count('+'), 3)) 131 | count = 0; count_CB = 0 132 | print(length_file, len(gen), len(CB_whether_exist)) 133 | for i in range(len(gen)): 134 | pred_array[i] = np.array([float(gen[i].split()[j]) for j in range(6, 9)]) 135 | if gen[i].split()[2] == 'CB': 136 | pred_CB[count_CB] = np.array([float(gen[i].split()[j]) for j in range(6, 9)]) 137 | count_CB += 1 138 | else: 139 | pred_array_without_CB[count] = np.array([float(gen[i].split()[j]) for j in range(6, 9)]) 140 | count += 1 141 | return pred_array, pred_array_without_CB, pred_CB 142 | else: 143 | # npy格式读取 144 | path_pred = path_pred.replace('real', 'our_out') 145 | pred_array = [] 146 | number = [all_atoms[atom] for atom in atoms_type[:-1]] 147 | pred_npy_without_CB = np.load(path_pred) 148 | pred_CB = np.zeros((CB_whether_exist.count('+'), 3)) 149 | pred_array_without_CB = [] 150 | count = 0 151 | for j in range(0, pred_npy_without_CB.shape[0]): 152 | # 进一步提取只有CB的array 153 | if j % 4 == int(number[-1]): 154 | for line in number: 155 | pred_array.append(pred_npy_without_CB[j +line - number[-1]]) 156 | pred_array_without_CB.append(pred_npy_without_CB[j +line - number[-1]]) 157 | 158 | if CB_whether_exist[j // 4] == '+': 159 | N_coord_pred = pred_npy_without_CB[j - number[-1]] 160 | CA_coord_pred = pred_npy_without_CB[j - number[-1] +1] 161 | C_coord_pred = pred_npy_without_CB[j - number[-1] +2] 162 | CB_coord_pred = pred_npy_without_CB[j - number[-1] +3] 163 | #根据预测出的C和N计算得到CB的坐标 164 | next_CB = pred_CBcoord(N_coord_pred, CA_coord_pred, C_coord_pred, CB_coord_pred) 165 | pred_CB[count] = np.array(next_CB) 166 | pred_array.append(next_CB) 167 | count += 1 168 | pred_array = array(pred_array) 169 | pred_array_without_CB = array(pred_array_without_CB) 170 | return pred_array, pred_array_without_CB, pred_CB 171 | 172 | 173 | # pdb或npy数据的位置, 174 | # 可以返回含有所有原子的array 175 | # 含(N,CA,C,O)的array 176 | # 只含CB的array 177 | def extraction_coord(path_real, path_pred): 178 | # 获取真实pdb的坐标信息返回为array 179 | real = extract_info_from_pdb(path_real) 180 | real_CB, CB_whether_exist, real_array, real_array_without_CB = CB_determine(real) 181 | 182 | # 获取pred的坐标信息返回为array 183 | pred_array, pred_array_without_CB, pred_CB = extract_info_from_pred(CB_whether_exist, path_pred ) 184 | 185 | return real_array, pred_array, real_array_without_CB, pred_array_without_CB, real_CB, pred_CB 186 | 187 | 188 | def test(real,pred): 189 | GC = 0 190 | for i in range(len(real)): 191 | A = real[i] 192 | B = pred[i] 193 | GC += np.square(np.linalg.norm(np.array(A) - np.array(B))) 194 | print(A-B) 195 | 196 | 197 | #real:真实的坐标数组 pred:生成的坐标数组 198 | def computation_rmsd(real, pred): 199 | K = np.eye(4) 200 | Sxx = Sxy = Sxz = Syx = Syy = Syz = Szx = Szy = Szz = 0 201 | GA = GB = GC = 0 202 | 203 | for i in range(len(real)): 204 | A = real[i]; B = pred[i] 205 | XA = A[0]; YA = A[1]; ZA = A[2] 206 | XB = B[0]; YB = B[1]; ZB = B[2] 207 | 208 | GA += np.square(np.linalg.norm(A)) 209 | GB += np.square(np.linalg.norm(B)) 210 | GC += np.square(np.linalg.norm(np.array(A) - np.array(B))) 211 | 212 | Sxx += XB * XA; Syy += YB * YA; Szz += ZB * ZA 213 | Sxy += XB * YA; Sxz += XB * ZA; Syz += YB * ZA 214 | Syx += YB * XA; Szx += ZB * XA; Szy += ZB * YA 215 | 216 | # 构建密钥矩阵 217 | K[0][0] = Sxx + Syy + Szz 218 | K[1][1] = Sxx - Syy - Szz 219 | K[2][2] = -Sxx + Syy - Szz 220 | K[3][3] = -Sxx - Syy + Szz 221 | K[0][1] = K[1][0] = Syz - Szy 222 | K[0][2] = K[2][0] = Szx - Sxz 223 | K[0][3] = K[3][0] = Sxy - Syx 224 | K[1][2] = K[2][1] = Sxy + Syx 225 | K[1][3] = K[3][1] = Szx + Sxz 226 | K[2][3] = K[3][2] = Syz + Szy 227 | 228 | # 计算最大特征值 229 | a, b = np.linalg.eig(K) 230 | u = max(a) 231 | 232 | # 计算rmsd 233 | rmsd = np.sqrt(abs((GA + GB - 2 * u)) / len(real)) 234 | # C_rmsd = np.sqrt(GC / len(real)) 235 | return rmsd 236 | 237 | #计算所有rmsd数值的矩阵 238 | def computation_rmsd_array(pred_end, sheet): 239 | atoms = atoms_type[0:-1] 240 | # 获取需要计算的文件名 241 | file_names = os.listdir(os.path.join(os.getcwd(), 'real')) 242 | # 暂且不算有问题的pdb, 具体问题正在进一步查找 243 | # file_names.remove('4fbr.pdb') 244 | file_names.remove('4avz.pdb') 245 | 246 | # 遍历所有pdb文件 247 | path_file = os.path.join(os.getcwd(), 'real') 248 | rmsd_array = np.zeros((len(file_names), len(atoms) + 2)) 249 | 250 | for file_real in file_names: 251 | idx = file_names.index(file_real) + 1 252 | sheet.write(idx, 0, file_real) 253 | 254 | path_real = os.path.join(path_file, file_real) 255 | if pred_end.endswith('pdb'): 256 | path_pred = path_real.replace('.pdb', '_out.pdb') 257 | elif pred_end.endswith('npy'): 258 | path_pred = path_real.replace('.pdb', '.npy') # 想要和真实数据集进行对比的文件后缀 259 | 260 | # 计算每个原子的rmsd 261 | real, pred, real_without_CB, pred_without_CB, real_CB, pred_CB = extraction_coord(path_real, path_pred) 262 | 263 | for atom in atoms: 264 | row = atoms.index(atom) 265 | real_atom = real_without_CB[row::len(atoms_type)-1] 266 | pred_atom = pred_without_CB[row::len(atoms_type)-1] 267 | rmsd_atom = computation_rmsd(real_atom, pred_atom) 268 | rmsd_array[idx - 1][row] = rmsd_atom 269 | 270 | # 计算CB的rmsd 271 | rmsd_CB = computation_rmsd(real_CB, pred_without_CB) 272 | rmsd_array[idx - 1][row + 1] = rmsd_CB 273 | # 计算全原子的rmsd 274 | rmsd = computation_rmsd(real, pred) 275 | rmsd_array[idx - 1][row + 2] = rmsd 276 | 277 | sheet.write(idx + 1, 0, 'mean') 278 | means = np.mean(rmsd_array, axis=0) 279 | rmsd_array = np.insert(rmsd_array, rmsd_array.shape[0], values=means, axis=0) 280 | return rmsd_array 281 | 282 | #将计算出的数值写入excel 283 | if __name__ == "__main__": 284 | # 创建excel,并写入每列名称 285 | book = xlwt.Workbook(encoding="utf-8", style_compression=0) 286 | pred_end = input('请输入文件格式:') 287 | # Create a sheet object, a sheet object corresponding to a table in the Excel file. 288 | if pred_end.endswith('pdb'): 289 | sheet = book.add_sheet('PD2', cell_overwrite_ok=True) 290 | elif pred_end.endswith('npy'): 291 | sheet = book.add_sheet('our', cell_overwrite_ok=True) 292 | 293 | atoms_type = input('请按pdb原子排列顺序输入需要计算的原子(逗号隔开):') 294 | if atoms_type == '': 295 | atoms_type= ['N', 'CA', 'C', 'O', 'CB'] 296 | else: 297 | atoms_type = atoms_type.split(",") 298 | # 写入每一列的title 299 | names = ['file_name'] + atoms_type + ['scut'] 300 | for i in range(len(names)): 301 | sheet.write(0, i, names[i]) 302 | 303 | #获取所有计算数值的rmsd 304 | rmsd_array = computation_rmsd_array(pred_end, sheet) 305 | 306 | #将矩阵数据写入excel 307 | for i in range(rmsd_array.shape[0]): 308 | for j in range(rmsd_array.shape[1]): 309 | sheet.write(i+1, j+1, rmsd_array[i][j]) 310 | 311 | if pred_end.endswith('pdb'): 312 | book.save('D://database//rmsd_compare//backbone_PD2.xls') 313 | elif pred_end.endswith('npy'): 314 | book.save('D://database//rmsd_compare//backbone1_our.xls') 315 | 316 | 317 | 318 | -------------------------------------------------------------------------------- /webserver/model.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import os 4 | from numpy import * 5 | from scipy.spatial.distance import pdist 6 | from scipy.spatial.distance import squareform 7 | import pathlib 8 | from pred_torsion import * 9 | from rebulid import * 10 | import shutil 11 | import time 12 | import random 13 | import sys 14 | 15 | # path = 'D:\\backbone_prediction' 16 | # distance_window_path = os.path.join(path, 'distance_window') 17 | # path_CA = sys.argv[1] 18 | # logger.info(path_CA) 19 | # path_CA = '/data/wwwroot/webserver/files/2019-11-19-07-20-19/CA_info' 20 | path_CA = 'D:\\backbone_prediction\\CA_infoglp' 21 | distance_window_path = path_CA.replace('CA_infoglp', 'distance_window_test1') 22 | pathlib.Path(distance_window_path).mkdir(parents=True, exist_ok=True) 23 | atoms_type = ['N', 'CA', 'C', 'O'] 24 | 25 | acid_normol = ['ALA', 'PHE', 'CYS', 'ASP', 'ASN', 26 | 'GLU', 'GLN', 'GLY', 'HIS', 'LEU', 27 | 'ILE', 'LYS', 'MET', 'PRO', 'ARG', 28 | 'SER', 'THR', 'VAL', 'TRP', 'TYR'] 29 | 30 | ALPHABET = {'A': 'ALA', 'F': 'PHE', 'C': 'CYS', 'D': 'ASP', 'N': 'ASN', 31 | 'E': 'GLU', 'Q': 'GLN', 'G': 'GLY', 'H': 'HIS', 'L': 'LEU', 32 | 'I': 'ILE', 'K': 'LYS', 'M': 'MET', 'P': 'PRO', 'R': 'ARG', 33 | 'S': 'SER', 'T': 'THR', 'V': 'VAL', 'W': 'TRP', 'Y': 'TYR'} 34 | 35 | AA_HYDROPATHICITY_INDEX = {'ARG': -4.5, 'LYS': -3.9, 'ASN': -3.5, 'ASP': -3.5, 'GLN': -3.5, 36 | 'GLU': -3.5, 'HIS': -3.2, 'PRO': -1.6, 'TYR': -1.3, 'TRP': -0.9, 37 | 'SER': -0.8, 'THR': -0.7, 'GLY': -0.4, 'ALA': 1.8, 'MET': 1.9, 38 | 'CYS': 2.5, 'PHE': 2.8, 'LEU': 3.8, 'VAL': 4.2, 'ILE': 4.5} 39 | 40 | AA_BULKINESS_INDEX = {'ARG': 14.28, 'LYS': 15.71, 'ASN': 12.82, 'ASP': 11.68, 'GLN': 14.45, 41 | 'GLU': 13.57, 'HIS': 13.69, 'PRO': 17.43, 'TYR': 18.03, 'TRP': 21.67, 42 | 'SER': 9.47, 'THR': 15.77, 'GLY': 3.4, 'ALA': 11.5, 'MET': 16.25, 43 | 'CYS': 13.46, 'PHE': 19.8, 'LEU': 21.4, 'VAL': 21.57, 'ILE': 21.4} 44 | 45 | AA_FLEXIBILITY_INDEX = {'ARG': 2.6, 'LYS': 1.9, 'ASN': 14., 'ASP': 12., 'GLN': 4.8, 46 | 'GLU': 5.4, 'HIS': 4., 'PRO': 0.05, 'TYR': 0.05, 'TRP': 0.05, 47 | 'SER': 19., 'THR': 9.3, 'GLY': 23., 'ALA': 14., 'MET': 0.05, 48 | 'CYS': 0.05, 'PHE': 7.5, 'LEU': 5.1, 'VAL': 2.6, 'ILE': 1.6} 49 | 50 | AA_MESSAGE = {} 51 | 52 | for aa_short in ALPHABET.keys(): 53 | aa_long = ALPHABET[aa_short] 54 | AA_MESSAGE.update({aa_short: [(5.5 - AA_HYDROPATHICITY_INDEX[aa_long]) / 10, 55 | AA_BULKINESS_INDEX[aa_long] / 21.67, 56 | (25. - AA_FLEXIBILITY_INDEX[aa_long]) / 25.]}) 57 | 58 | AA_MESSAGE.update({aa_long: [(5.5 - AA_HYDROPATHICITY_INDEX[aa_long]) / 10, 59 | AA_BULKINESS_INDEX[aa_long] / 21.67, 60 | (25. - AA_FLEXIBILITY_INDEX[aa_long]) / 25.]}) 61 | 62 | 63 | # 提取CA原子信息 64 | def atoms_infos(file_name): 65 | file = open(os.path.join(path_CA, file_name), 'r') 66 | lines = file.readlines() 67 | atoms_info = [line.strip('\n') for line in lines 68 | if line.split()[0] == 'ATOM' and line.split()[2] == 'CA'] 69 | array_head_tail = np.zeros((5, 3)) 70 | 71 | for line in lines: 72 | if line.split()[0] == 'ATOM' and line.split()[2] == 'N': 73 | array_head_tail[0] = [float(line.split()[j]) for j in range(6, 9)] 74 | break 75 | 76 | for line in lines[::-1]: 77 | if line.split()[0] == 'ATOM' and line.split()[2] == 'C': 78 | array_head_tail[1] = [float(line.split()[j]) for j in range(6, 9)] 79 | break 80 | 81 | for line in lines[::-1]: 82 | if line.split()[0] == 'ATOM' and line.split()[2] == 'O': 83 | array_head_tail[2] = [float(line.split()[j]) for j in range(6, 9)] 84 | break 85 | 86 | for line in lines: 87 | if line.split()[0] == 'ATOM' and line.split()[2] == 'C': 88 | array_head_tail[3] = [float(line.split()[j]) for j in range(6, 9)] 89 | break 90 | 91 | for line in lines[::-1]: 92 | if line.split()[0] == 'ATOM' and line.split()[2] == 'N': 93 | array_head_tail[4] = [float(line.split()[j]) for j in range(6, 9)] 94 | break 95 | 96 | delet = [] 97 | # 筛掉重复概率小的氨基酸 98 | for i in range(len(atoms_info)): 99 | if atoms_info[i - 1].split()[2] == atoms_info[i].split()[2] and \ 100 | atoms_info[i - 1].split()[5] == atoms_info[i].split()[5]: 101 | if atoms_info[i - 1].split()[-3] <= atoms_info[i].split()[-3]: 102 | delet.append(i - 1) 103 | else: 104 | delet.append(i) 105 | for i in delet[::-1]: 106 | del atoms_info[i] 107 | return atoms_info,array_head_tail 108 | 109 | 110 | # 提取坐标信息 111 | def extract_coord(atoms_info): 112 | coord_array = np.zeros((len(atoms_info), 3)) 113 | # coord_all = np.zeros((len(atoms_info), 3)) 114 | acid_list = [] 115 | CB_whether_exist = [] 116 | for i in range(len(atoms_info)): 117 | 118 | #判断该氨基酸是否存在CB 119 | if atoms_info[i].split()[2] == 'CA': 120 | if atoms_info[i].split()[3] == 'GLY': 121 | CB_whether_exist.append('-') 122 | else: 123 | CB_whether_exist.append('+') 124 | coord_array[i] = [float(atoms_info[i].split()[j]) for j in range(6, 9)] 125 | acid_list.append(atoms_info[i].split()[3][-3::]) 126 | 127 | acid_array = array(acid_list) 128 | return coord_array, acid_array, CB_whether_exist 129 | 130 | 131 | def torsion(): 132 | for n in range(len(torsion_sin)): 133 | torsion_training[n] = math.atan2(torsion_sin[n], torsion_cos[n]) 134 | 135 | 136 | def distance_window(coord_array, acid_array, i): 137 | WINDOW_SIZE = 15 138 | distCA = pdist(coord_array, metric='euclidean') 139 | distCA = squareform(distCA).astype('float32') 140 | # save_name = file_name.replace('pdb', 'npy') 141 | save_name = str(i) + '.npy' 142 | mark_type = [('distance', float), ('aa', 'S10')] 143 | dist_windows = [] 144 | 145 | for i in range(len(distCA)): 146 | marked_array = [] 147 | new_array = [] 148 | for j in range(len(distCA[i])): 149 | marked_array.append((distCA[i, j], acid_array[j])) 150 | marked_array = np.array(marked_array, dtype=mark_type) 151 | marked_array = np.sort(marked_array, order='distance')[:WINDOW_SIZE] 152 | for j in range(len(marked_array)): 153 | aa = marked_array[j][1].decode('utf-8') 154 | new_array.append([marked_array[j][0]] + AA_MESSAGE[aa]) 155 | dist_windows.append(new_array) 156 | dist_windows = np.array(dist_windows).astype('float32') 157 | 158 | np.save(os.path.join(distance_window_path, save_name), dist_windows) 159 | 160 | 161 | #通过两坐标计算单位向量 162 | def vector_unit(vector_1,vector_2): 163 | bond_vector_2 = vector_1 - vector_2 164 | bond_length_2 = np.linalg.norm(bond_vector_2) 165 | return bond_vector_2 / bond_length_2 166 | 167 | 168 | def torsion_angle(A, B, C): 169 | #计算法向量 170 | U_2 = vector_unit(B,A); U_1 = vector_unit(C,B)#; U = vector_unit(D,C) 171 | # N = np.cross(U_1, U) / np.linalg.norm(np.cross(U_1, U)) 172 | N_1 = np.cross(U_2, U_1) / np.linalg.norm(np.cross(U_2, U_1)) 173 | m_weight = np.array([U_1, np.cross(N_1, U_1), N_1]) 174 | 175 | #torsion_angle 176 | # try: 177 | # angle = np.sign(np.dot(U_2,N)) * math.acos(np.dot(N_1,N)) 178 | # except: 179 | # angle = 0 180 | return m_weight 181 | 182 | 183 | #根据真实角度或训练角度预测下一个坐标 184 | def next_coord(A, B, C, R, angle_confirm,torsion_pred): 185 | #torsion_angle 186 | m = torsion_angle(A, B, C) 187 | #将真实角度或预测角度赋值给torsion 188 | torsion = torsion_pred 189 | # print("N——angle:",angle_real,angle_train) 190 | angle_martix=[math.cos(math.pi-angle_confirm), 191 | math.sin(math.pi-angle_confirm) * math.cos(torsion), 192 | math.sin(math.pi-angle_confirm) * math.sin(torsion)] 193 | #计算下一个坐标 194 | next_corrd = C + R * np.dot(m.T, angle_martix) 195 | 196 | return next_corrd 197 | 198 | 199 | #计算预测的CB位置 200 | def pred_CBcoord(N_coord, CA_coord, C_coord, CB_coord): 201 | # N和C的中间向量 202 | vector_midleline = (vector_unit(N_coord, CA_coord) + vector_unit(C_coord, CA_coord)) 203 | vector_midleline_unit = vector_midleline / np.linalg.norm(vector_midleline) 204 | 205 | C = CA_coord + vector_midleline_unit * 0.841829775235248 206 | # C2 = N_coord + vector_unit(C_coord, N_coord) * 1.190426725853957 207 | angle_confirm = math.pi / 2 208 | 209 | # 统计CA到CB的距离: R = np.linalg.norm(C1 - CB_coord) 210 | R = 2.1545175870366853 # 统计得到的CA到CB的距离 211 | torsion = 0.5999114448494303 # 根据统计得到的旋转角 212 | 213 | # 计算得到预测的CB位置 214 | next_CB_coord = next_coord(CA_coord, N_coord, C, R, angle_confirm, torsion) 215 | return next_CB_coord 216 | 217 | 218 | #将预测的CB加入数组 219 | def add_pred_CB(pred_npy_without_CB, CB_whether_exist): 220 | pred_array = [] 221 | for j in range(0, pred_npy_without_CB.shape[0]): 222 | # 进一步提取只有CB的array 223 | if j % 4 == 0: 224 | for line in range(4): 225 | pred_array.append(pred_npy_without_CB[j + line]) 226 | if CB_whether_exist[j // 4] == '+': 227 | N_coord_pred = pred_npy_without_CB[j] 228 | CA_coord_pred = pred_npy_without_CB[j + 1] 229 | C_coord_pred = pred_npy_without_CB[j + 2] 230 | O_coord_pred = pred_npy_without_CB[j + 3] 231 | # 根据预测出的C和N计算得到CB的坐标 232 | next_CB = pred_CBcoord(N_coord_pred, CA_coord_pred, C_coord_pred, O_coord_pred) 233 | pred_array.append(next_CB) 234 | return array(pred_array) 235 | 236 | 237 | def recovery_infos(pred_array, CA_infos, backbone_path): 238 | # pred_array = np.load(pred_array1) 239 | # CA_info = atoms_infos(pred_array1.split(".")[0]+".pdb") 240 | 241 | # after_work = open(pred_array1.split(".")[0] + "1" + ".pdb", "w") 242 | backbone = open(backbone_path, 'w') 243 | # 完成pdb的框架 244 | list1 = [] 245 | for i in range(len(CA_infos)): 246 | if CA_infos[i].split()[3] != "GLY": 247 | for j in range(5): 248 | list1.append(CA_infos[i]) 249 | else: 250 | for j in range(4): 251 | list1.append(CA_infos[i]) 252 | 253 | # 命名N\C\O\CB 254 | i = 0 255 | while i < len(list1) - 3: 256 | if list1[i].split()[3] == "GLY": 257 | list1[i] = list1[i].replace(list1[i].split()[2], "N ") 258 | list1[i + 2] = list1[i + 2].replace(list1[i + 2].split()[2], "C ") 259 | list1[i + 3] = list1[i + 3].replace(list1[i + 3].split()[2], "O ") 260 | i = i + 4 261 | 262 | else: 263 | list1[i] = list1[i].replace(list1[i].split()[2], "N ") 264 | list1[i + 2] = list1[i + 2].replace(list1[i + 2].split()[2], "C ") 265 | list1[i + 3] = list1[i + 3].replace(list1[i + 3].split()[2], "O ") 266 | list1[i + 4] = list1[i + 4].replace(list1[i + 4].split()[2], "CB") 267 | i = i + 5 268 | 269 | # 将npy的数据取三位小数 270 | for i in range(len(pred_array)): 271 | for j in range(3): 272 | pred_array[i][j] = "%.3f" % pred_array[i][j] 273 | 274 | # 坐标替换及补齐小数点位数 275 | for i in range(len(list1)): 276 | for j in range(3): 277 | if len(str(pred_array[i][j]).split(".")[1]) < 3: 278 | list1[i] = list1[i].replace(list1[i].split()[j + 6], str(pred_array[i][j]).split(".")[0] + "." + \ 279 | str(pred_array[i][j]).split(".")[1].ljust(3, '0')) 280 | else: 281 | list1[i] = list1[i].replace(list1[i].split()[j + 6], str(pred_array[i][j])) 282 | 283 | # 最后一项原子名称修改 284 | list1[i] = list1[i].replace(list1[i].split()[11], list1[i].split()[2][0]) 285 | 286 | # 序号与格式 287 | t = list1[i].split() 288 | list1[i] = t[0].ljust(7, ' ') + str(i + 1).rjust(4, ' ') + " " + t[2].ljust(3, ' ') + t[3].rjust(4, 289 | ' ') + " " + \ 290 | t[4].ljust(2, ' ') + t[5].rjust(3, ' ') + t[6].rjust(12, ' ') + t[7].rjust(8, ' ') + t[8].rjust(8, 291 | ' ') + \ 292 | " " + t[9].ljust(5, ' ') + t[10].ljust(16, ' ') + t[11] 293 | 294 | for e in list1: 295 | backbone.write(e + "\n") 296 | backbone.close() 297 | 298 | if __name__ == "__main__": 299 | CB_whether_exist_all = [] 300 | #提取坐标信息计算windows_distance 301 | # time_statics = np.zeros((100, 4)) 302 | count = 0 303 | # book = xlwt.Workbook(encoding="utf-8", style_compression=0) 304 | # sheet = book.add_sheet('time_statics', cell_overwrite_ok=True) 305 | # for iter in range(100): 306 | start1 = time.time() 307 | # for file_name in os.listdir(path_CA): 308 | f = open('D:\\backbone_prediction\\CA_infoglp\\orign.txt', 'r') 309 | file = f.readlines() 310 | seqs = [seq.split()[0] for seq in file] 311 | # filename = [seq.split()[0] for seq in file] 312 | for i, seq in enumerate(seqs): 313 | try: 314 | # file_name = filename[i] + '.pdb' 315 | file_name = '4avz.pdb' 316 | atoms_info, ground_true_coos = atoms_infos(file_name) 317 | coord_array, acid_array, CB_whether_exist = extract_coord(atoms_info) 318 | # CB_whether_exist_all.append(CB_whether_exist) 319 | 320 | distance_window(coord_array[:223], seq, str(i)) 321 | test_dataset = DistanceWindow( 322 | distance_window_path=distance_window_path) 323 | data_loader = DataLoader(dataset=test_dataset) 324 | except Exception as e: 325 | print(e) 326 | 327 | end1 = time.time() 328 | # 融合50个模型的角度 329 | 330 | models_path = 'D:\\backbone_prediction\\top_models' 331 | 332 | total_acid = 0 333 | 334 | with torch.no_grad(): 335 | 336 | models = [] 337 | start2 = time.time() 338 | for model_name in os.listdir(models_path): 339 | model = torch.load(os.path.join(models_path, model_name), map_location='cuda:0') 340 | model.eval() 341 | model.is_training = False 342 | models.append(model) 343 | end2 = time.time() 344 | 345 | start3 = time.time() 346 | for arrays, torsions, output_filename in data_loader: 347 | total_file = 0 348 | 349 | for model in models: 350 | arrays = arrays.to(device) 351 | pred_sincos = model(arrays[0]).squeeze(1).transpose(0, 1) 352 | output = pred_sincos.data.cpu().numpy() 353 | total_file += output 354 | total_file = total_file / 50 355 | 356 | # 根据预测角度复原坐标 357 | start4 = time.time() 358 | filename = output_filename[0] 359 | coos = [] 360 | 361 | # 读入CA数据 362 | atoms_info, ground_true_coos_real = atoms_infos(filename.replace('npy', 'pdb')) 363 | coord_array, acid_array, CB_whether_exist = extract_coord(atoms_info) 364 | for coo in coord_array: 365 | coos.append(Coordinate(coo)) 366 | 367 | PATH_OUTPUT = path_CA.replace('CA_info', 'backbone') 368 | pathlib.Path(PATH_OUTPUT).mkdir(parents=True, exist_ok=True) 369 | 370 | pred = total_file 371 | torsions_C = np.arctan2(pred[0], pred[1]) 372 | torsions_N = np.arctan2(pred[2], pred[3]) 373 | # 复原骨架结构 374 | backbone_pred_without_CB = backbone_rebuild_separated_torsion(coos, torsions_C, torsions_N) 375 | 376 | ground_true_coos = np.zeros((3, 3)) 377 | # print(backbone_pred_without_CB[1],backbone_pred_without_CB[-2]) 378 | if (ground_true_coos[0] == np.zeros((1, 3))).all(): 379 | ground_true_coos[0] = next_coord(coord_array[1], backbone_pred_without_CB[1], coord_array[0], 380 | 1.45801, 2.124, 2.7) 381 | 382 | if (ground_true_coos[1] == np.zeros((1, 3))).all(): 383 | ground_true_coos[1] = next_coord(coord_array[-2], backbone_pred_without_CB[-2], coord_array[-1], 384 | 1.52326, 1.941, -1.4) 385 | 386 | if (ground_true_coos[2] == np.zeros((1, 3))).all(): 387 | ground_true_coos[2] = next_coord(coord_array[-2], backbone_pred_without_CB[-2], coord_array[-1], 388 | 2.408748478225743, 1.4915450962173677, -1.4) 389 | 390 | # loss[iter][count] = np.array([np.linalg.norm(ground_true_coos[i] - ground_true_coos_real[i]) for i in range(3)]) 391 | 392 | pred_npy_without_CB = np.concatenate((ground_true_coos[0].reshape([1, 3]), 393 | backbone_pred_without_CB, ground_true_coos[-2:]), axis=0).astype( 394 | 'float32') 395 | 396 | CB_whether_exist = CB_whether_exist_all[count] 397 | count += 1 398 | pred_array = add_pred_CB(pred_npy_without_CB, CB_whether_exist) 399 | 400 | backbone_path = os.path.join(PATH_OUTPUT, filename.replace('npy', 'pdb')) 401 | recovery_infos(pred_array, atoms_info, backbone_path) 402 | end4 = time.time() 403 | time_total = end4 - start4 404 | end3 = time.time() 405 | # time_statics[iter] = np.array([end1-start1, end2-start2, end3-start3-time_total, time_total]) 406 | print(end1-start1, end2-start2, end3-start3-time_total, time_total) 407 | 408 | # 将loss写入excel 409 | # loss_mean = np.mean(loss, axis=0) 410 | # for i in range(loss_mean.shape[0]): 411 | # for j in range(loss_mean.shape[1]): 412 | # sheet.write(i+1,j+1,loss_mean[i][j]) 413 | # book.save('D://backbone_prediction//NC_random1.xls') n 414 | # print(time_statics) 415 | # for i in range(100): 416 | # sheet.write(i + 1, 0, i+1) 417 | # for j in range(4): 418 | # sheet.write(i+1, j+1, time_statics[i][j]) 419 | # book.save('D://backbone_prediction//time_stattics_cpu.xls') 420 | -------------------------------------------------------------------------------- /transform.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import time 4 | import numpy as np 5 | import tensorflow as tf 6 | import matplotlib.pyplot as plt 7 | import pathlib 8 | #from scipy.stats import norm as nm 9 | from multiprocessing import Pool 10 | import argparse 11 | import matplotlib.image 12 | # from .arraylize import Arraylize 13 | 14 | 15 | parser = argparse.ArgumentParser(description='manual to this script') 16 | parser.add_argument('--resolution', type=int, default='256', 17 | help='output resolution') 18 | parser.add_argument('--dataset_path', type=str, default=os.getcwd(), 19 | help='path of dataset') 20 | parser.add_argument('--output_path', type=str, default=os.getcwd()+'/processed_data', 21 | help='path of output') 22 | parser.add_argument('--dataset', type=str, default='cif_filtered', 23 | help='name of dataset folder, bc-30-1_CA|bc-30-1_chains|cif_filtered') 24 | parser.add_argument('--input_type', type=str, default='cif', 25 | help='type of input file, cif|pdb') 26 | parser.add_argument('--output_type', type=str, default='image', 27 | help='image or distance_map, images|distance_map') 28 | parser.add_argument('--axis_range', type=int, default='64', 29 | help='map range of structures, 42|64') 30 | parser.add_argument('--multi_process', type=bool, default=True, 31 | help='multi process or not') 32 | parser.add_argument('--multi_atom', type=bool, default=False, 33 | help='input all backbone atoms or CA only') 34 | parser.add_argument('--move2center', type=bool, default=True, 35 | help='relocate the center of proteins to the center of coordinate system') 36 | parser.add_argument('--redistribute', type=bool, default=False, 37 | help='redistribute the original distribution according to normal distribution') 38 | parser.add_argument('--relative_number', type=bool, default=False, 39 | help='mark dots with relative serial number') 40 | parser.add_argument('--draw_connection', type=bool, default=True, 41 | help='draw dots connection or not') 42 | parser.add_argument('--aminoacid_message', type=bool, default=True, 43 | help='mark amino acid with hydropathicity, bulkiness and flexibility or 1.') 44 | parser.add_argument('--redistribute_rate', type=float, default='1.4', 45 | help='coefficient of redistribution amplitude') 46 | args = parser.parse_args() 47 | 48 | res = args.resolution 49 | ar = args.axis_range 50 | s = ar / res # scale=axis_range/resolution 51 | input_folder = args.dataset_path + '/' + args.dataset 52 | AMINO_ACIDS = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 53 | 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 54 | 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 55 | 'SER', 'THR', 'TRP', 'TYR', 'VAL'] 56 | AA_HYDROPATHICITY_INDEX = { 57 | 'ARG': -4.5, 58 | 'LYS': -3.9, 59 | 'ASN': -3.5, 60 | 'ASP': -3.5, 61 | 'GLN': -3.5, 62 | 'GLU': -3.5, 63 | 'HIS': -3.2, 64 | 'PRO': -1.6, 65 | 'TYR': -1.3, 66 | 'TRP': -0.9, 67 | 'SER': -0.8, 68 | 'THR': -0.7, 69 | 'GLY': -0.4, 70 | 'ALA': 1.8, 71 | 'MET': 1.9, 72 | 'CYS': 2.5, 73 | 'PHE': 2.8, 74 | 'LEU': 3.8, 75 | 'VAL': 4.2, 76 | 'ILE': 4.5, 77 | } 78 | AA_BULKINESS_INDEX = { 79 | 'ARG': 14.28, 80 | 'LYS': 15.71, 81 | 'ASN': 12.82, 82 | 'ASP': 11.68, 83 | 'GLN': 14.45, 84 | 'GLU': 13.57, 85 | 'HIS': 13.69, 86 | 'PRO': 17.43, 87 | 'TYR': 18.03, 88 | 'TRP': 21.67, 89 | 'SER': 9.47, 90 | 'THR': 15.77, 91 | 'GLY': 3.4, 92 | 'ALA': 11.5, 93 | 'MET': 16.25, 94 | 'CYS': 13.46, 95 | 'PHE': 19.8, 96 | 'LEU': 21.4, 97 | 'VAL': 21.57, 98 | 'ILE': 21.4, 99 | } 100 | AA_FLEXIBILITY_INDEX = { 101 | 'ARG': 2.6, 102 | 'LYS': 1.9, 103 | 'ASN': 14., 104 | 'ASP': 12., 105 | 'GLN': 4.8, 106 | 'GLU': 5.4, 107 | 'HIS': 4., 108 | 'PRO': 0.05, 109 | 'TYR': 0.05, 110 | 'TRP': 0.05, 111 | 'SER': 19., 112 | 'THR': 9.3, 113 | 'GLY': 23., 114 | 'ALA': 14., 115 | 'MET': 0.05, 116 | 'CYS': 0.05, 117 | 'PHE': 7.5, 118 | 'LEU': 5.1, 119 | 'VAL': 2.6, 120 | 'ILE': 1.6, 121 | } 122 | AMINO_ACID_NUMBERS = {} 123 | if args.aminoacid_message: 124 | for aa in AMINO_ACIDS: 125 | AMINO_ACID_NUMBERS.update({aa: [(5.5-AA_HYDROPATHICITY_INDEX[aa]) / 10 * 255., 126 | AA_BULKINESS_INDEX[aa] / 21.67 * 255., 127 | (25.-AA_FLEXIBILITY_INDEX[aa]) / 25. * 255.]}) 128 | else: 129 | for aa in AMINO_ACIDS: 130 | AMINO_ACID_NUMBERS.update({aa: [1.]}) 131 | ary_dim = 2 + len(AMINO_ACID_NUMBERS[AMINO_ACIDS[0]]) 132 | 133 | 134 | class Atom(object): 135 | def __init__(self, aminoacid, index, x, y, z, atom_type='CA', element='C'): 136 | self.index = int(index) 137 | self.aa = aminoacid 138 | self.x = float(x) 139 | self.y = float(y) 140 | self.z = float(z) 141 | self.type = atom_type 142 | self.element = element 143 | 144 | 145 | def readfile(filename, path): 146 | file = open(path + '/' + filename, 'r') 147 | if os.path.splitext(filename)[1] == '.cif' or os.path.splitext(filename)[1]=='.pdb': 148 | message = file.readlines() 149 | return message 150 | 151 | file.close() 152 | 153 | 154 | 155 | def extract_cif(cif_message): 156 | atoms = [] 157 | for line in cif_message: 158 | line = line.split() 159 | if line[3] in ['CA', 'C', 'N']: 160 | atoms.append(Atom(line[5], line[8], line[10], 161 | line[11], line[12], line[3], line[2])) 162 | return atoms 163 | 164 | 165 | def extract_ca_cif(cif_message): 166 | atoms = [] 167 | for line in cif_message: 168 | line = line.split() 169 | if line[3] == 'CA': 170 | atoms.append(Atom(line[5], line[8], line[10], line[11], line[12])) 171 | return atoms 172 | 173 | 174 | def extract_pdb(pdb_message): 175 | atoms = [] 176 | for line in pdb_message: 177 | if line[13:15] in ['N ', 'CA', 'C ']: 178 | atoms.append(Atom(line[17:20], line[13:16], line[30:38], 179 | line[38:46], line[46:54], line[13:16], line[77])) 180 | return atoms 181 | 182 | 183 | def extract_ca_pdb(pdb_message): 184 | atoms = [] 185 | for line in pdb_message: 186 | if line[13:15] == 'CA': 187 | atoms.append(Atom(line[17:20], line[13:16], line[30:38], line[38:46], line[46:54])) 188 | return atoms 189 | 190 | 191 | def extract_message(message, message_type): 192 | if message_type == 'pdb': 193 | if args.multi_atom: 194 | return extract_pdb(message) 195 | else: 196 | return extract_ca_pdb(message) 197 | elif message_type == 'cif': 198 | if args.multi_atom: 199 | return extract_cif(message) 200 | else: 201 | return extract_ca_cif(message) 202 | 203 | 204 | def find_head(atoms): 205 | for atom in atoms: 206 | if atom.type == 'CA': 207 | return atom 208 | 209 | 210 | def find_tail(atoms): 211 | for i in range(1, len(atoms)+1): 212 | if atoms[-i].type == 'CA': 213 | return atoms[-i] 214 | 215 | 216 | def rotation_axis(head): 217 | x = head.x 218 | y = head.y 219 | z = head.z 220 | c = ((y - x) ** 2 / 221 | ((y * res * (x ** 2 + y ** 2 + z ** 2 - 2 * s ** 2) ** 0.5 / ar - z) ** 2 222 | + (x * res * (x ** 2 + y ** 2 + z ** 2 - 2 * s ** 2) ** 0.5 / ar - z) ** 2 223 | + (y - x) ** 2) 224 | ) ** 0.5 225 | a = (y * res * (x ** 2 + y ** 2 + z ** 2 - 2 * s ** 2) ** 0.5 / ar - z) / (x - y) * c 226 | b = (x * res * (x ** 2 + y ** 2 + z ** 2 - 2 * s ** 2) ** 0.5 / ar - z) / (y - x) * c 227 | return [(a, b, c), (-a, -b, -c)] # 转轴 228 | 229 | 230 | def rotation_angle(head): 231 | x = head.x 232 | y = head.y 233 | z = head.z 234 | return math.acos( 235 | ((x + y) * s + z * (x ** 2 + y ** 2 + z ** 2 - 2 * s ** 2) ** 0.5) / 236 | (x ** 2 + y ** 2 + z ** 2) 237 | ) # 转角 238 | 239 | 240 | def rotation(u, v, w, t, axis): # 原始坐标 241 | (a, b, c) = axis 242 | # 罗德里格旋转公式: 243 | rx = u*math.cos(t)+(b*w-c*v)*math.sin(t)+a*(a*u+b*v+c*w)*(1-math.cos(t)) 244 | ry = v*math.cos(t)+(c*u-a*w)*math.sin(t)+b*(a*u+b*v+c*w)*(1-math.cos(t)) 245 | rz = w*math.cos(t)+(a*v-b*u)*math.sin(t)+c*(a*u+b*v+c*w)*(1-math.cos(t)) 246 | return rx, ry, rz # 旋转所得坐标 247 | 248 | 249 | def relocate(atoms): 250 | head = find_head(atoms) 251 | tail = find_tail(atoms) 252 | x_o = (head.x + tail.x) / 2 253 | y_o = (head.y + tail.y) / 2 254 | z_o = (head.z + tail.z) / 2 255 | for atom in atoms: 256 | atom.x -= x_o 257 | atom.y -= y_o 258 | atom.z -= z_o 259 | vs = rotation_axis(head) 260 | t = rotation_angle(head) 261 | atom_v = [] 262 | for v in vs: 263 | atom_v.append(rotation(head.x, head.y, head.z, t, v)) 264 | if abs(atom_v[0][0] - s) + abs(atom_v[0][1] - s) < abs(atom_v[1][0] - s) + abs(atom_v[1][1] - s): 265 | for atom in atoms: 266 | (atom.x, atom.y, atom.z) = rotation(atom.x, atom.y, atom.z, t, vs[0]) 267 | else: 268 | for atom in atoms: 269 | (atom.x, atom.y, atom.z) = rotation(atom.x, atom.y, atom.z, t, vs[1]) 270 | return atoms 271 | 272 | 273 | def move2center(atoms): 274 | coordinates = [] 275 | for atom in atoms: 276 | if atom.type == 'CA': 277 | coordinates.append([atom.x, atom.y, atom.z]) 278 | coordinates = np.array(coordinates) 279 | center = tf.Variable(tf.zeros([1, 3])) 280 | distances = coordinates-center 281 | loss = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(distances), 1))) 282 | optimizer = tf.train.GradientDescentOptimizer(0.5) 283 | train = optimizer.minimize(loss) 284 | init = tf.global_variables_initializer() 285 | sess = tf.Session() 286 | sess.run(init) 287 | losses = [] 288 | for step in range(10): 289 | sess.run(train) 290 | losses.append(sess.run(loss)) 291 | while losses[-1] != losses[-5]: 292 | sess.run(train) 293 | losses.append(sess.run(loss)) 294 | final_center = sess.run(center)[0] 295 | for atom in atoms: 296 | atom.x -= final_center[0] 297 | atom.y -= final_center[1] 298 | atom.z -= final_center[2] 299 | tf.reset_default_graph() 300 | return atoms 301 | 302 | 303 | def sign(x): 304 | if x < 0: 305 | return -1 306 | else: 307 | return 1 308 | 309 | 310 | def close_neibor(array, x_ary, y_ary, dot, dis_x, dis_y, rec): 311 | x_step = sign(dis_x) 312 | y_step = sign(dis_y) 313 | if abs(dis_x) < abs(dis_y): 314 | neibors = [(0, y_step), (x_step, 0), (x_step, y_step), (-x_step, 0), 315 | (0, -y_step), (-x_step, y_step), (x_step, -y_step), (-x_step, -y_step)] 316 | else: 317 | neibors = [(x_step, 0), (0, y_step), (x_step, y_step), (0, -y_step), 318 | (-x_step, 0), (x_step, -y_step), (-x_step, y_step), (-x_step, -y_step)] 319 | step = 1 320 | while True: 321 | for (i, j) in neibors: 322 | try: 323 | if array[x_ary + i * step, y_ary + j * step, 2] == 0: 324 | array[x_ary + i * step, y_ary + j * step] = [dot.z, dot.index] + AMINO_ACID_NUMBERS.get(dot.aa) 325 | rec.update({(x_ary + i * step, y_ary + j * step): dot}) 326 | # print('dot%d:%d,%d->%d,%d'%(dot[6],x,y,x+i*step,y+j*step)) 327 | return array 328 | except IndexError: 329 | print('dot(%d+%d,%d+%d) is out of the edge' % (x_ary, i * step, y_ary, j * step)) 330 | # print('%d step neibor of dot%d(%d,%d) is full!'%(step,dot_i,x,y)) 331 | step += 1 332 | 333 | 334 | def lattice_battle(array, x_ary, y_ary, dot1, dot2, rec): # dot1 is original; dot2 is new 335 | dis1_x = dot1.x / (2 * s) % 1 - 0.5 336 | dis1_y = dot1.y / (2 * s) % 1 - 0.5 337 | dis2_x = dot2.x / (2 * s) % 1 - 0.5 338 | dis2_y = dot2.y / (2 * s) % 1 - 0.5 339 | if dis1_x ** 2 + dis1_y ** 2 > dis2_x ** 2 + dis2_y ** 2: 340 | # print('%d / %d swap!'%(dot1[6],dot2[6])) 341 | array = close_neibor(array, x_ary, y_ary, dot1, dis1_x, dis1_y, rec) 342 | array[x_ary, y_ary] = [dot2.z, dot2.index] + AMINO_ACID_NUMBERS[dot2.aa] 343 | rec.update({(x_ary, y_ary) : dot2}) 344 | else: 345 | array = close_neibor(array, x_ary, y_ary, dot2, dis2_x, dis2_y, rec) 346 | return array 347 | 348 | 349 | def draw_atom(x, y, dot, array, rec): 350 | if array[x, y, -1] == 0: 351 | array[x, y] = [dot.z, dot.index] + AMINO_ACID_NUMBERS[dot.aa] 352 | rec.update({(x, y): dot}) 353 | 354 | 355 | def arraylize(atoms, array_dim): 356 | array = np.zeros([res, res, array_dim], dtype=float, order='C') 357 | rec = {} # atoms record 358 | for atom in atoms: 359 | x_ary = int((atom.x + ar) // (2 * s)) 360 | y_ary = int((atom.y + ar) // (2 * s)) 361 | if rec.get((x_ary, y_ary)): 362 | array = lattice_battle(array, x_ary, y_ary, rec[(x_ary, y_ary)], atom, rec) 363 | else: 364 | draw_atom(x_ary, y_ary, atom, array, rec) 365 | return array, rec 366 | 367 | 368 | # def values_sta(path): 369 | # xs = [] 370 | # ys = [] 371 | # for filename in os.listdir(path): 372 | # atoms = move2center(relocate(extract_cif(readfile(filename, path)))) 373 | # for atom in atoms: 374 | # xs.append(atom.x) 375 | # ys.append(atom.y) 376 | # return xs, ys 377 | 378 | 379 | def normal_dis(values, var, coefficient): 380 | dis = [] 381 | values.sort() 382 | mark = 0 383 | idx = 0 384 | for i in range(res): 385 | cut_point = nm.ppf((i + 1) / res, 0, var**0.5 * coefficient) 386 | if idx == len(values): 387 | dis.append([]) 388 | mark = int(idx) 389 | else: 390 | while values[idx] < cut_point: 391 | idx += 1 392 | if idx == len(values): 393 | dis.append(values[mark:idx]) 394 | mark = int(idx) 395 | break 396 | else: 397 | dis.append(values[mark:idx]) 398 | mark = int(idx) 399 | return dis 400 | 401 | 402 | # def redistribute(): 403 | 404 | 405 | def visual_values_dis(values): 406 | mark = 0 407 | idx = 0 408 | dis = [] 409 | dis_count = [] 410 | axis_length = 2*ar 411 | for i in range(1, res+1): 412 | cut_point = (i-res/2)*axis_length/res 413 | if idx == len(values): 414 | dis.append([]) 415 | else: 416 | while values[idx] < cut_point: 417 | idx += 1 418 | if idx == len(values): 419 | dis.append(values[mark:idx]) 420 | break 421 | else: 422 | dis.append(values[mark:idx]) 423 | mark = int(idx) 424 | for i in range(res): 425 | dis_count.append(len(dis[i])) 426 | plt.bar(range(res), dis_count) 427 | plt.show() 428 | 429 | 430 | def vis_normal_dis(values, var, coefficient): 431 | dis = [] 432 | values.sort() 433 | mark = 0 434 | idx = 0 435 | dis_count = [] 436 | for i in range(res): 437 | cut_point = nm.ppf((i+1)/res, 0, var**0.5*coefficient) 438 | if idx == len(values): 439 | dis.append([]) 440 | mark = int(idx) 441 | else: 442 | while values[idx] < cut_point: 443 | idx += 1 444 | if idx == len(values): 445 | dis.append(values[mark:idx]) 446 | mark = int(idx) 447 | break 448 | else: 449 | dis.append(values[mark:idx]) 450 | mark = int(idx) 451 | dis_count.append(len(dis[i])) 452 | plt.bar(range(res), dis_count) 453 | plt.show() 454 | 455 | 456 | def draw_dot(x, y, dot1, z_add, idx_add, array): 457 | if array[x, y, 2] == 0: 458 | array[x, y] = [dot1.z + z_add, dot1.index + idx_add, 0, 0, 0] 459 | 460 | 461 | def dots_connection(dot1, dot2, array, site): 462 | x = site[dot1][0] 463 | y = site[dot1][1] 464 | z_s = dot2.z - dot1.z 465 | x_r = sign(site[dot2][0] - x) 466 | y_r = sign(site[dot2][1] - y) 467 | x_s = abs(site[dot2][0] - x) 468 | y_s = abs(site[dot2][1] - y) 469 | dis_c = max(x_s, y_s)+1 470 | if x_s + y_s > 2: 471 | for i in range(max(x_s, y_s)): 472 | l = i + 1 473 | if min(x_s, y_s) <= 1: 474 | if x_s > y_s: 475 | draw_dot(x + l*x_r, y, dot1, z_s*l/dis_c, l/dis_c, array) 476 | else: 477 | draw_dot(x, y + l*y_r, dot1, z_s*l/dis_c, l/dis_c, array) 478 | else: 479 | t = max(x_s, y_s) // min(x_s, y_s) 480 | remainder = max(x_s, y_s) % min(x_s, y_s) 481 | if x_s > y_s: 482 | j = [l, i//t, l, y_s] 483 | else: 484 | j = [i//t, l, x_s, l] 485 | if i < max(x_s, y_s) - remainder: 486 | draw_dot(x + j[0] * x_r, y + j[1] * y_r, dot1, z_s*l/dis_c, l/dis_c, array) 487 | else: 488 | draw_dot(x + j[2] * x_r, y + j[3] * y_r, dot1, z_s*l/dis_c, l/dis_c, array) 489 | 490 | 491 | def draw_connection(atoms, array, rec): 492 | site = {} 493 | for (x, y) in rec.keys(): 494 | site.update({rec[(x, y)]: [x, y]}) 495 | for i in range(len(atoms) - 1): 496 | dots_connection(atoms[i], atoms[i + 1], array, site) 497 | 498 | 499 | def write_log(path): 500 | arg_name_list = ['dataset', 'resolution', 'input_type', 'output_type', 'axis_range', 'multi_atom', 501 | 'move2center', 'redistribute', 'redistribute_rate', 'relative_number', 'draw_connection', 502 | 'aminoacid_message'] 503 | arg_list = [args.dataset, args.resolution, args.input_type, args.output_type, args.axis_range, args.multi_atom, 504 | args.move2center, args.redistribute, args.redistribute_rate, args.relative_number, args.draw_connection, 505 | args.aminoacid_message] 506 | write_list = [time.strftime("%Y%m%d_%H%M", time.localtime())] 507 | for i in range(len(arg_name_list)): 508 | print("%s = %s" % (arg_name_list[i], str(arg_list[i]))) 509 | write_list.append("%s = %s" % (arg_name_list[i], str(arg_list[i]))) 510 | write_list.append('\n\n\n') 511 | with open(path + '/args_log.txt', 'a') as log_writer: 512 | log_writer.write('\n'.join(write_list)) 513 | 514 | 515 | def process(): 516 | log_dir = args.output_path + '/' + args.dataset 517 | output_dir = args.output_path + '/' + args.dataset + '/' + time.strftime("%Y%m%d_%H%M", time.localtime()) 518 | pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) 519 | write_log(log_dir) 520 | num = 0 521 | if args.output_type == 'image': 522 | if args.redistribute: 523 | atoms_dic = {} 524 | xs = [] 525 | ys = [] 526 | for filename in os.listdir(input_folder): 527 | atoms = relocate(extract_message(readfile(filename, input_folder), args.input_type)) 528 | if args.move2center: 529 | atoms = move2center(atoms) 530 | for atom in atoms: 531 | xs.append(atom.x) 532 | ys.append(atom.y) 533 | atoms_dic.update({filename: atoms}) 534 | # var_sta = max(np.var(xs), np.var(ys)) 535 | else: 536 | for filename in os.listdir(input_folder): 537 | atoms = relocate(extract_message(readfile(filename, input_folder), args.input_type)) 538 | if args.move2center: 539 | atoms = move2center(atoms) 540 | for i in range(len(atoms)): 541 | atoms[i].z=(atoms[i].z+64.)*2.-2. 542 | if args.draw_connection: 543 | array, rec = arraylize(atoms, ary_dim) 544 | draw_connection(atoms, array, rec) 545 | else: 546 | array, _ = arraylize(atoms, ary_dim) 547 | if args.relative_number: 548 | array[:, :, 1] /= (len(atoms) + 1) 549 | output_name = filename.replace('.cif', '.npy') 550 | 551 | np.save(output_dir + '/' + output_name, array) 552 | # break 553 | # matplotlib.image.imsave(output_dir + '/' + output_name.replace('.npy', '.png'), array) 554 | # num += 1 555 | # if num == 10: 556 | # break 557 | elif args.output_type == 'distance_map': 558 | if args.multi_atom: 559 | for filename in os.listdir(input_folder): 560 | atoms = extract_message(readfile(filename, input_folder), args.input_type) 561 | 562 | 563 | if __name__ == '__main__': 564 | print('Parent process %s.' % os.getpid()) 565 | p = Pool(3) 566 | p.apply_async(process()) 567 | print('Waiting for all subprocesses done...') 568 | p.close() 569 | p.join() 570 | print('All subprocesses done.') 571 | 572 | -------------------------------------------------------------------------------- /angle_computation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import math\n", 11 | "import os \n", 12 | "import random\n", 13 | "import argparse\n", 14 | "from scipy.spatial.distance import pdist\n", 15 | "from scipy.spatial.distance import squareform\n", 16 | "from scipy.spatial.distance import cdist\n", 17 | "\n", 18 | "parser = argparse.ArgumentParser()\n", 19 | "parser.add_argument(\"--verbosity\", help=\"increase output verbosity\")\n", 20 | "parser.add_argument('--input_type', type=str, default='pn',\n", 21 | " help='type of input file, cif|pdb')\n", 22 | "args = parser.parse_args(args=[])\n", 23 | "\n", 24 | "\n", 25 | "#读取Protein-Net坐标\n", 26 | "def atom_net(coord,atom_id):\n", 27 | " atom = np.array([float(coord[0].split()[atom_id])] + [float(coord[1].split()[atom_id])]+ [float(coord[2].split()[atom_id])])/100\n", 28 | " return atom\n", 29 | "\n", 30 | "\n", 31 | "#读取cif坐标\n", 32 | "def atom_cif(atoms, id_):\n", 33 | " atom = np.array([float(atoms[id_].split()[10])] + [float(atoms[id_].split()[11])] + [float(atoms[id_].split()[12])])\n", 34 | " return atom\n", 35 | "\n", 36 | "\n", 37 | "#根据两点求单位向量\n", 38 | "def vector_unit(vector_1,vector_2):\n", 39 | " bond_vector_2 = vector_1 - vector_2 \n", 40 | " bond_length_2 = np.linalg.norm(bond_vector_2)\n", 41 | " return bond_vector_2 / bond_length_2\n", 42 | "\n", 43 | "\n", 44 | "#计算标准法向量 \n", 45 | "def normal_vector_(B, C, D):\n", 46 | " U_1 = vector_unit(C,B) ; U = vector_unit(D,B) \n", 47 | " N = np.cross(U_1,U) / np.linalg.norm(np.cross(U_1,U)) \n", 48 | " return N \n", 49 | "\n", 50 | "\n", 51 | "#根据矩阵坐标求距离矩阵 \n", 52 | "def contact_martix(A):\n", 53 | " # A是一个向量矩阵:euclidean代表欧式距离\n", 54 | " distA=pdist(A,metric='euclidean')\n", 55 | " # 将distA数组变成一个矩阵\n", 56 | " distB = squareform(distA)\n", 57 | " return distB\n", 58 | "\n", 59 | "\n", 60 | "#计算旋转角和坐标转换权重\n", 61 | "def torsion_angle(A, B, C, D):\n", 62 | " #计算法向量\n", 63 | " U_2 = vector_unit(B,A) ; U_1 = vector_unit(C,B) ; U = vector_unit(D,C) \n", 64 | " N = np.cross(U_1,U) / np.linalg.norm(np.cross(U_1,U)) \n", 65 | " N_1 = np.cross(U_2,U_1) / np.linalg.norm(np.cross(U_2,U_1))\n", 66 | " m_weight = np.array([U_1 , np.cross(N_1,U_1) , N_1]) \n", 67 | " #torsion_angle\n", 68 | " angle = np.sign(np.dot(U_2,N)) * math.acos(np.dot(N_1,N))#+np.random.normal(loc=0.0, scale=1, size=None)*math.pi/18\n", 69 | " return angle, m_weight\n", 70 | "\n", 71 | "\n", 72 | "#根据真实角度或训练角度预测下一个坐标\n", 73 | "def next_coord(A, B, C, D, R, angle_confirm, angle_train):\n", 74 | " #torsion_angle\n", 75 | " angle_real , m = torsion_angle(A, B, C, D)\n", 76 | " #将真实角度或预测角度赋值给torsion\n", 77 | " torsion = angle_real\n", 78 | "# print(\"N——angle:\",angle_real,angle_train)\n", 79 | " angle_martix=[math.cos(math.pi-angle_confirm),\n", 80 | " math.sin(math.pi-angle_confirm) * math.cos(torsion),\n", 81 | " math.sin(math.pi-angle_confirm) * math.sin(torsion)]\n", 82 | " #计算下一个坐标\n", 83 | " next_corrd = C + R * np.dot(m.T, angle_martix)\n", 84 | " return next_corrd, torsion\n", 85 | "\n", 86 | "\n", 87 | "#计算旋转角和坐标转换权重\n", 88 | "def torsion_angle_C(A, B, C, D):\n", 89 | " #计算法向量\n", 90 | " U_2 = vector_unit(B,A) ; U_1 = vector_unit(C,B) ; U = vector_unit(D,B) \n", 91 | " N = np.cross(U_1,U) / np.linalg.norm(np.cross(U_1,U)) \n", 92 | " N_1 = np.cross(U_2,U_1) / np.linalg.norm(np.cross(U_2,U_1))\n", 93 | " m_weight = np.array([U_1 , np.cross(N_1,U_1) , N_1]) \n", 94 | " #torsion_angle\n", 95 | " angle = np.sign(np.dot(U_2,N)) * math.acos(np.dot(N_1,N))#+np.random.normal(loc=0.0, scale=1, size=None)*math.pi/18\n", 96 | " return angle, m_weight\n", 97 | "\n", 98 | "\n", 99 | "#根据真实角度或训练角度 沿CA-CA轴旋转 预测同一平面的C\n", 100 | "def next_coord_C(A, B, C, D, R, angle_confirm , angle_train):\n", 101 | " #torsion_angle\n", 102 | " angle_real , m = torsion_angle_C(A, B, C, D)\n", 103 | " #将真实角度或预测角度赋值给torsion\n", 104 | " torsion = angle_real\n", 105 | "# print(\"C——angle:\",angle_real,angle_train)\n", 106 | " angle_martix=[math.cos(math.pi-angle_confirm),\n", 107 | " math.sin(math.pi-angle_confirm) * math.cos(torsion),\n", 108 | " math.sin(math.pi-angle_confirm) * math.sin(torsion)]\n", 109 | " #计算下一个坐标\n", 110 | " next_corrd = B + R * np.dot(m.T, angle_martix)\n", 111 | " return next_corrd, torsion\n", 112 | "\n", 113 | "\n", 114 | "#根据真实坐标计算旋转角\n", 115 | "def Cartesian_to_angle(path_file):\n", 116 | " if args.input_type == 'cif':\n", 117 | " f = open(path_file, 'r') ; lines = f.readlines() ; atoms = len(lines)\n", 118 | " else:\n", 119 | " f = open(path_file, 'r') ; f_line = f.readlines()\n", 120 | " xyz = f_line[-6:-3] ; chain = f_line[-2]\n", 121 | " atoms = chain.count('+') * 3 ; n_zero = chain[0:len(chain)//2].count('-') * 3; \n", 122 | " \n", 123 | " for i in range(2,atoms-1):\n", 124 | " #读取数据\n", 125 | " if args.input_type == 'cif':\n", 126 | " A = atom_cif(lines, i-2) ; B = atom_cif(lines, i-1) ; C = atom_cif(lines, i) ; D = atom_cif(lines, i+1)\n", 127 | " else:\n", 128 | " atom_id = n_zero+i \n", 129 | " A = atom_net(xyz, atom_id-2); B = atom_net(xyz, atom_id-1) ; C = atom_net(xyz, atom_id) ; D = atom_net(xyz, atom_id+1)\n", 130 | " #计算单位向量\n", 131 | " U_2 = vector_unit(B,A) ; U_1 = vector_unit(C,B) ; U = vector_unit(D,C) \n", 132 | " #计算法向量\n", 133 | " N = np.cross(U_1,U) / np.linalg.norm(np.cross(U_1,U)) ; N_1 = np.cross(U_2,U_1) / np.linalg.norm(np.cross(U_2,U_1))\n", 134 | " #计算角度\n", 135 | " angle = np.sign(np.dot(U_2,N)) * math.acos(np.dot(N_1,N))/math.pi * 180 ; print(angle)\n", 136 | " f.close() \n", 137 | " \n", 138 | " \n", 139 | "#复现05-文献 基于cif格式文件 \n", 140 | "def angle_to_Cartesian_cif(path, path_angle):\n", 141 | " f=open(path,'r') ; lines=f.readlines() ; total = 0 ; distance = 0\n", 142 | " A = atom_cif(lines, 0) ; B = atom_cif(lines, 1) ; C = atom_cif(lines, 2) \n", 143 | " #构建接触矩阵\n", 144 | " true = np.zeros(shape=(len(lines),3)) \n", 145 | " true[0] = A ; true[1] = B ; true[2] = C ;\n", 146 | " \n", 147 | " generation = np.zeros(shape=(len(lines),3))\n", 148 | " generation[0] = A ; generation[1] = B ; generation[2] = C\n", 149 | " \n", 150 | " #读取预测的角度信息\n", 151 | " if not os.path.exists(path_angle):\n", 152 | " torsion_training = 0\n", 153 | " else:\n", 154 | " torsion_training = np.load(path_angle)\n", 155 | " \n", 156 | " #计算下一个原子坐标\n", 157 | " for i in range(2,len(lines)-1):\n", 158 | " #给定键长键角\n", 159 | " if lines[i].split()[3] == 'CA':\n", 160 | " R = 1.52326 ; angle_confirm = 1.941\n", 161 | " elif lines[i].split()[3] == 'C':\n", 162 | " R = 1.32868; angle_confirm= 2.028\n", 163 | " elif lines[i].split()[3] == 'N':\n", 164 | " R = 1.45801 ; angle_confirm = 2.124 \n", 165 | " \n", 166 | " #获取当前原子的预测旋转角 \n", 167 | " torsion_train = torsion_training[i//3] \n", 168 | " \n", 169 | " D = atom_cif(lines, i+1) ; next_xyz, angle = next_coord(A, B, C, D, R, angle_confirm, torsion_training)\n", 170 | " true[i+1] = D ; generation[i+1] = next_xyz\n", 171 | " total += np.square(np.linalg.norm(next_xyz - D))\n", 172 | " A = B ; B = C ; C = next_xyz\n", 173 | " \n", 174 | " #根据接触矩阵计算rmsd \n", 175 | " T = contact_martix(true) ; G = contact_martix(generation)\n", 176 | " distance = np.square(np.linalg.norm(T - G))\n", 177 | " rmsd = np.sqrt(distance/(len(lines)-1)/len(lines))\n", 178 | " print(rmsd,len(lines))\n", 179 | " #由于误差会传递,所以rmsd较大\n", 180 | " \n", 181 | " #求接触矩阵的差的接触矩阵\n", 182 | "# dist=cdist(true,generation,metric='euclidean')\n", 183 | "# # print(dist)\n", 184 | "# rmsd = np.square(np.linalg.norm(dist))/len(lines)/(len(lines)-1)\n", 185 | "# print(rmsd,len(lines)) ; f.close()、\n", 186 | " f.close()\n", 187 | "\n", 188 | " \n", 189 | "#复现05-文献 基于Protein-Net\n", 190 | "def angle_to_Cartesian(path_file, path_angle):\n", 191 | " total = 0 ; distance = 0\n", 192 | " f = open(path_file, 'r') ; f_line = f.readlines()\n", 193 | " xyz = f_line[-6:-3] ; chain = f_line[-2]\n", 194 | " atoms = chain.count('+') * 3 ; n_zero = chain[0:len(chain)//2].count('-') * 3; \n", 195 | " # Constants\n", 196 | " A = atom_net(xyz, n_zero + 0) ; B = atom_net(xyz, n_zero + 1) ; C = atom_net(xyz, n_zero + 2) \n", 197 | " true = np.zeros(shape=(atoms,3)) ; true[0] = A ; true[1] = B ; true[2] = C ;\n", 198 | " generation = np.zeros(shape=(atoms,3)); generation[0] = A ; generation[1] = B ; generation[2] = C\n", 199 | " \n", 200 | " #读取预测的角度信息\n", 201 | " if not os.path.exists(path_angle):\n", 202 | " torsion_training = 0\n", 203 | " else:\n", 204 | " torsion_training = np.load(path_angle)\n", 205 | " \n", 206 | " #计算下一个原子坐标\n", 207 | " for i in range(2,atoms-1):\n", 208 | " D = atom_net(xyz, atom_id+1) \n", 209 | " atom_id = n_zero+i \n", 210 | " \n", 211 | " #给定键长键角\n", 212 | " if atom_id % 3 == 1 :\n", 213 | " R = 1.52326 ; angle_confirm = 1.941\n", 214 | " elif atom_id % 3 == 2 :\n", 215 | " R = 1.32868; angle_confirm= 2.028\n", 216 | " elif atom_id % 3 == 0:\n", 217 | " R = 1.45801 ; angle_confirm = 2.124 \n", 218 | " \n", 219 | " #获取当前原子的预测旋转角 \n", 220 | " torsion_train = torsion_training[i//3] \n", 221 | " #预测下一个原子坐标\n", 222 | " next_xyz, angle = next_coord(A, B, C, D, R, angle_confirm, torsion_training)\n", 223 | " \n", 224 | " #构建接触矩阵\n", 225 | " true[i+1] = D \n", 226 | " generation[i+1] = next_xyz\n", 227 | " total += np.square(np.linalg.norm(next_xyz - D))\n", 228 | " A = B ; B = C ; C = next_xyz\n", 229 | " \n", 230 | " #根据接触矩阵计算rmsd \n", 231 | " T = contact_martix(true) ; G = contact_martix(generation)\n", 232 | " distance = np.square(np.linalg.norm(T - G))\n", 233 | " rmsd = np.sqrt(distance/(atoms-1)/atoms)\n", 234 | " print(rmsd,atoms)\n", 235 | " #由于误差会传递,所以rmsd较大\n", 236 | " f.close()\n", 237 | " \n", 238 | " \n", 239 | "#以CA-CA轴为旋转轴 根据旋转角预测原子坐标 (三种情况判断误差最小的情况)\n", 240 | "def angle_to_Cartesian_CA_compare(path_file,path_angle):\n", 241 | " total = 0 ; distance = 0; a=0; b=0; c=0\n", 242 | " \n", 243 | " if args.input_type == 'cif':\n", 244 | " f = open(path_file, 'r'); lines = f.readlines(); atoms = len(lines)\n", 245 | " next_N = atom_cif(lines,0)\n", 246 | " \n", 247 | " else:\n", 248 | " f = open(path_file, 'r'); f_line = f.readlines()\n", 249 | " xyz = f_line[-6:-3]; chain = f_line[-2]\n", 250 | " atoms = chain.count('+') * 3; n_zero = chain[0:len(chain)//2].count('-') * 3;\n", 251 | " \n", 252 | " #读取预测的角度信息\n", 253 | " if not os.path.exists(path_angle):\n", 254 | " torsion_training = 0\n", 255 | " else:\n", 256 | " torsion_training = np.load(path_angle)\n", 257 | " \n", 258 | " for i in range(2,atoms-4,3): \n", 259 | " if args.input_type == 'cif':\n", 260 | " A = atom_cif(lines,i-2); B = atom_cif(lines,i-1); C = atom_cif(lines,i+2)\n", 261 | " D_C = atom_cif(lines, i); D_N = atom_cif(lines, i+1); C_2 = atom_cif(lines,i+3) \n", 262 | " \n", 263 | " else:\n", 264 | " atom_id = n_zero+i \n", 265 | " A = atom_net(xyz, atom_id-2); B = atom_net(xyz, atom_id-1); C = atom_net(xyz, atom_id + 2) \n", 266 | " D_C = atom_net(xyz, atom_id) ; D_N = atom_net(xyz, atom_id+1); C_2 = atom_net(xyz, atom_id+3) \n", 267 | " \n", 268 | " #获取当前原子的预测旋转角 \n", 269 | " torsion_train = torsion_training[i//3] \n", 270 | " \n", 271 | " #CA_next - C沿CA - CA轴做旋转\n", 272 | " #0.21941264623804932:C-CA轴和CA-CA轴的夹角\n", 273 | " R_C = 2.4345193937977068 ; angle_confirm_C = 0.21941264623804932\n", 274 | " next_C , angle_C = next_coord(A, B, C, D_C, R_C, angle_confirm_C, torsion_training)\n", 275 | " \n", 276 | " #CA - C沿CA - CA轴做旋转\n", 277 | " #0.35529281510453287:CA-C轴和CA-CA轴的夹角\n", 278 | " R_C_2 = 1.52326 ; angle_confirm_C_2 = math.pi - 0.35529281510453287\n", 279 | " next_C_2 , angle_C_2 = next_coord_C(A, B, C, D_C, R_C_2, angle_confirm_C_2, torsion_training)\n", 280 | " \n", 281 | " #CA_next - N沿CA - CA轴做旋转\n", 282 | " #0.263502970667963:CA-N轴和CA-CA轴的夹角\n", 283 | " R_N = 1.45801 ; angle_confirm_N = 0.263502970667963\n", 284 | " next_N , angle_N = next_coord(A, B, C, D_N, R_N, angle_confirm_N, torsion_training)\n", 285 | " \n", 286 | " total_C = np.linalg.norm(next_C - D_C)\n", 287 | " total_C_2 = np.linalg.norm(next_C_2 - D_C)\n", 288 | " total_N = np.linalg.norm(next_N - D_N)\n", 289 | " a += total_C; b += total_C_2; c += total_N\n", 290 | " x=(atoms - 6) // 3 \n", 291 | " print(a/x, b/x, c/x)\n", 292 | " f.close()\n", 293 | "\n", 294 | " \n", 295 | "def angle_to_Cartesian_CA_CA(path_file,path_angle):\n", 296 | " total = 0; distance = 0; a = 0; b = 0\n", 297 | " #读取数据\n", 298 | " if args.input_type == 'cif':\n", 299 | " f = open(path_file, 'r'); lines = f.readlines(); atoms = len(lines)\n", 300 | " next_N = atom_cif(lines,0)\n", 301 | " \n", 302 | " #建造接触矩阵\n", 303 | " true = np.zeros(shape=(atoms,3)); generation = np.zeros(shape=(atoms,3))\n", 304 | " true[0] = atom_cif(lines,0); generation[0] = atom_cif(lines,0)\n", 305 | " true[-1] = atom_cif(lines,atoms-1); generation[-1] = atom_cif(lines,atoms-1)\n", 306 | " \n", 307 | " else:\n", 308 | " f = open(path_file, 'r'); f_line = f.readlines()\n", 309 | " xyz = f_line[-6:-3]; chain = f_line[-2]\n", 310 | " atoms = chain.count('+') * 3; n_zero = chain[0:len(chain)//2].count('-') * 3;\n", 311 | " \n", 312 | " #建造接触矩阵\n", 313 | " true = np.zeros(shape=(atoms,3)); generation = np.zeros(shape=(atoms,3))\n", 314 | " true[0] = atom_net(xyz,0); generation[0] = atom_net(xyz,0)\n", 315 | " true[-1] = atom_net(xyz,atoms-1); generation[-1] = atom_net(xyz,atoms-1)\n", 316 | " \n", 317 | " #读取预测的角度信息\n", 318 | " torsion_training = np.zeros(shape=(len(torsion_sin),1))\n", 319 | " if not os.path.exists(path_angle):\n", 320 | " torsion_training = np.zeros(shape=(len(torsion_sin),1))\n", 321 | " torsion_training[0] = 'none'\n", 322 | " else:\n", 323 | " torsion_sin = np.load(path_angle)[0] \n", 324 | " torsion_cos = np.load(path_angle)[1] \n", 325 | " # torsion_training = np.load(path_angle)[2] * math.pi\n", 326 | " torsion_training = np.zeros(shape=(len(torsion_sin),1))\n", 327 | " \n", 328 | " for n in range(len(torsion_sin)):\n", 329 | " torsion_training[n] = math.atan2(torsion_sin[n],torsion_cos[n])\n", 330 | " \n", 331 | " for i in range(5,atoms-1,3):\n", 332 | " \n", 333 | " if args.input_type == 'cif':\n", 334 | " A = atom_cif(lines, i-4); B = atom_cif(lines,i-1); C = atom_cif(lines,i+2)\n", 335 | " D_C = atom_cif(lines, i); D_N = atom_cif(lines, i+1) \n", 336 | " else:\n", 337 | " atom_id = n_zero+i \n", 338 | " A = atom_net(xyz, atom_id-4); B = atom_net(xyz, atom_id-1); C = atom_net(xyz, atom_id + 2) \n", 339 | " D_C = atom_net(xyz, atom_id); D_N = atom_net(xyz, atom_id+1)\n", 340 | " \n", 341 | " if torsion_training[0] != 'none':\n", 342 | " torsion_train_N = torsion_training[i//3] #介入训练的角度\n", 343 | "\n", 344 | " if torsion_train_N >0:\n", 345 | " torsion_train_C = torsion_train_N - math.pi \n", 346 | " else:\n", 347 | " torsion_train_C = math.pi + torsion_train_N\n", 348 | " \n", 349 | " #CA - C沿CA - CA轴做旋转 \n", 350 | " #0.35529281510453287:CA-C轴和CA-CA轴的夹角\n", 351 | " R_C = 1.52326 \n", 352 | " angle_confirm_C = math.pi - 0.35529281510453287\n", 353 | " next_C , angle_C = next_coord_C(A, B, C, D_C, R_C, angle_confirm_C, torsion_train_C)\n", 354 | " \n", 355 | " #CA_next - N沿CA - CA轴做旋转\n", 356 | " #0.263502970667963:CA-N轴和CA-CA轴的夹角\n", 357 | " R_N = 1.45801\n", 358 | " angle_confirm_N = 0.263502970667963\n", 359 | " next_N , angle_N = next_coord (A, B, C, D_N, R_N, angle_confirm_N, torsion_train_N)\n", 360 | " \n", 361 | " true[i-1] = B; generation[i-1] = B\n", 362 | " true[i] = D_C; generation[i] = next_C\n", 363 | " true[i+1] = D_N; generation[i+1] = next_N\n", 364 | "# print(next_C-D_C)\n", 365 | "\n", 366 | " #构建接触矩阵,计算rmsd \n", 367 | " T = contact_martix(true) ; G = contact_martix(generation)\n", 368 | " distance = np.square(np.linalg.norm(T - G))\n", 369 | " rmsd = np.sqrt(distance/((atoms-1)*atoms))\n", 370 | " print(rmsd,atoms)\n", 371 | " \n", 372 | " \n", 373 | " #另一种度量损失方式以及建立文档记录的代码\n", 374 | "# write_rmsd = ('ProteinNet-文件名:'+ path_file.split('\\\\')[-1] +'\\n' + \n", 375 | "# '接触矩阵的rmsd: ' + str(rmsd) + '\\n------------------------------------------')\n", 376 | "# total_C = np.square(np.linalg.norm(next_C - D_C))\n", 377 | "# total_N = np.square(np.linalg.norm(next_N - D_N))\n", 378 | "# a += total_C ; b += total_N\n", 379 | "\n", 380 | "# x=(atoms-6)//6 ; write_rmsd = ('ProteinNet-文件名:'+path_file.split('\\\\')[-1]+'\\n'+' C原子的平均误差:'+str(np.sqrt(a/(x)))+'\\n'\n", 381 | "# ' N原子的平均误差:'+ str(np.sqrt(b/(x)))+'\\n------------------------------------------')\n", 382 | "# print(write_rmsd)\n", 383 | "# f_rmsd.write(write_rmsd +'\\n')\n", 384 | "# print(np.linalg.norm(next_N-next_C) )\n", 385 | "# print(angle_C,angle_N) ;\n", 386 | " f.close()\n", 387 | " \n", 388 | "\n", 389 | "def angle_to_Cartesian_intersection(path_file,path_angle):\n", 390 | " total = 0; distance = 0; a = 0; b = 0\n", 391 | " #读取数据\n", 392 | " if args.input_type == 'cif':\n", 393 | " f = open(path_file, 'r'); lines = f.readlines(); atoms = len(lines)\n", 394 | " next_N = atom_cif(lines,0)\n", 395 | " \n", 396 | " else:\n", 397 | " f = open(path_file, 'r'); f_line = f.readlines()\n", 398 | " xyz = f_line[-6:-3]; chain = f_line[-2]\n", 399 | " atoms = chain.count('+') * 3; n_zero = chain[0:len(chain)//2].count('-') * 3;\n", 400 | "\n", 401 | " for i in range(5,atoms-1,3):\n", 402 | " \n", 403 | " if args.input_type == 'cif':\n", 404 | " A = atom_cif(lines, i-4); B = atom_cif(lines,i-1); C = atom_cif(lines,i+2)\n", 405 | " D_C = atom_cif(lines, i); D_N = atom_cif(lines, i+1); N = atom_cif(lines,i-2)\n", 406 | " else:\n", 407 | " atom_id = n_zero+i \n", 408 | " A = atom_net(xyz, atom_id-4); B = atom_net(xyz, atom_id-1); C = atom_net(xyz, atom_id + 2) \n", 409 | " D_C = atom_net(xyz, atom_id); D_N = atom_net(xyz, atom_id+1) ; N = atom_net(xyz, atom_id-2);\n", 410 | " \n", 411 | " #根据键长键角计算圆环的半径\n", 412 | " #0.35529281510453287:CA-C轴和CA-CA轴的夹角\n", 413 | " R_C = 1.52326 * math.cos(0.35529281510453287)\n", 414 | " R_C_2 = 1.52326 * math.cos(math.pi - 1.941)\n", 415 | " \n", 416 | " #求第一个法向量 CA-CA轴\n", 417 | " normal_vector_1 = (C-B)/np.linalg.norm(C-B)\n", 418 | " #求第一个圆心 CA-CA轴\n", 419 | " next_CA_mid = B + R_C * normal_vector_1\n", 420 | " \n", 421 | " #求第二个法向量 C-N轴\n", 422 | " normal_vector_2 = (B-N)/np.linalg.norm(B-N)\n", 423 | " #求第二个圆心 CA-CA轴\n", 424 | " next_C_mid = B + R_C_2 * normal_vector_2\n", 425 | " #两个法向量之间的角度不是固定的\n", 426 | " \n", 427 | "# print(np.dot(normal_vector_2,vector_unit(next_C_2,next_C_mid)))#验证法向量\n", 428 | " #半径大的误差大\n", 429 | " \n", 430 | " #根据求出的半径和圆心复原坐标 ,验证半径和圆心是否计算正确 是否可还原坐标\n", 431 | " true_C = next_C_mid + ((D_C - next_C_mid) / np.linalg.norm(D_C - next_C_mid)) * 1.52326 * math.sin(math.pi - 1.941)\n", 432 | " true_C_1 = next_CA_mid + ((D_C - next_CA_mid) / np.linalg.norm(D_C - next_CA_mid)) * 0.5298886988235514\n", 433 | " #0.5298886988235514:CA-C轴绕CA-CA轴旋转的半径\n", 434 | " \n", 435 | " \n", 436 | " #两平面相交直线L0的方向向量\n", 437 | " L0_dir = np.cross(normal_vector_1,normal_vector_2) / np.linalg.norm(np.cross(normal_vector_1,normal_vector_2)) \n", 438 | " \n", 439 | " #平面1上于L0垂直的L1的方向向量\n", 440 | " L1_dir = np.cross(L0_dir,normal_vector_1) / np.linalg.norm(np.cross(L0_dir,normal_vector_1)) \n", 441 | " \n", 442 | " #两圆心连起来的的向量ps\n", 443 | " ps = next_C_mid - next_CA_mid #p-s \n", 444 | " \n", 445 | " #计算平面1的圆心到平面2的距离D\n", 446 | " D = np.dot(ps,normal_vector_2)\n", 447 | " \n", 448 | " #计算平面1圆心到交线L0的距离\n", 449 | " cos_a_n = np.dot(L1_dir,normal_vector_2)\n", 450 | " t = D / cos_a_n\n", 451 | " \n", 452 | " #求出L1和L0的交点\n", 453 | " R = next_CA_mid + t* L1_dir\n", 454 | " print(D,t)\n", 455 | " \n", 456 | " #从两个交点中找出和正确坐标更近的点\n", 457 | " L0 = R - np.sqrt((0.5298886988235514**2)-t**2) * L0_dir\n", 458 | " L0_ = R + np.sqrt((0.5298886988235514**2)-t**2) * L0_dir\n", 459 | " \n", 460 | " print(np.linalg.norm(L0 - next_CA_mid))\n", 461 | " print(min(np.linalg.norm(L0 - D_C),np.linalg.norm(L0_ - D_C)))\n", 462 | "\n", 463 | " print('--------------------------------------------------')\n", 464 | " f.close()\n", 465 | "\n", 466 | "#根据局部原子坐标,找到训练出来的npy匹配的真实的文件名\n", 467 | "#path:真实文件的存放路径\n", 468 | "#path_test:训练出来需要更改为真实文件名的npy文件\n", 469 | "def change_filename(path,path_test): \n", 470 | " for filename in os.listdir(path):\n", 471 | " path_angle = os.path.join(path,filename)\n", 472 | " load = np.array(np.load(path_angle))\n", 473 | " for file in os.listdir(path_test):\n", 474 | " path_fake = os.path.join(path_test,file)\n", 475 | " load_fake = np.array(np.load(path_fake))\n", 476 | " if np.linalg.norm(load[0][0:3]-load_fake[2][0:3]) == 0:\n", 477 | " print(filename)\n", 478 | " path_new = os.path.join(path_test,filename)\n", 479 | " os.rename(path_fake,path_new) \n", 480 | " \n", 481 | "# path = 'G:\\\\Computational reconstruction\\\\plane_torsion'\n", 482 | "# path_test = 'G:\\\\Computational reconstruction\\\\torsion_validation'\n", 483 | "# change_filename(path,path_test)\n", 484 | "\n", 485 | "\n", 486 | "def traverse_file(path):\n", 487 | " ask = input('是否要计算旋转角? y/n:')\n", 488 | " ask_repetition = input('是否采用05-文献方法计算坐标? y/n:')\n", 489 | " ask_CA_CA = input('是否以CA_CA为旋转轴? y/n:')\n", 490 | " ask_intersection = input('是否通过求两个环的交点预测原子坐标? y/n:')\n", 491 | " \n", 492 | " for i in os.listdir(path):\n", 493 | " path_file = os.path.join(path, i)\n", 494 | " #替换路径为角度npy的路径(训练出来的旋转角度)\n", 495 | " #需和原始数据放在同一个大文件夹下 且文件名相匹配\n", 496 | " path_angle_ = path_file.replace('test','torsion_validation')\n", 497 | " path_angle = path_angle_.replace('.pn','.npy')\n", 498 | " \n", 499 | " #根据不同情况执行对应情况函数\n", 500 | " if ask == 'y':\n", 501 | " Cartesian_to_angle(path_file)\n", 502 | " \n", 503 | " elif ask_repetition == 'y':\n", 504 | " if i.split('.')[-1] == 'cif':\n", 505 | " angle_to_Cartesian_cif(path_file, path_angle)\n", 506 | " elif i.split('.')[-1] == 'pn':\n", 507 | " angle_to_Cartesian(path_file, path_angle)\n", 508 | " \n", 509 | " elif ask_CA_CA == 'y':\n", 510 | " angle_to_Cartesian_CA_CA(path_file, path_angle)\n", 511 | " \n", 512 | " elif ask_intersection == 'y': \n", 513 | " angle_to_Cartesian_intersection(path_file,path_angle)\n", 514 | " \n", 515 | "\n", 516 | "# angle_to_Cartesian_CA_C(path_cif,path_angle)#计算CA-C为轴长的复原方法\n", 517 | "# angle_to_Cartesian_CA_CA(path_cif,path_angle)#3-CA为旋转基准面的复原方法\n", 518 | "\n", 519 | "traverse_file('G:\\Computational reconstruction\\\\plam\\\\test' )" 520 | ] 521 | } 522 | ], 523 | "metadata": { 524 | "kernelspec": { 525 | "display_name": "Python 3", 526 | "language": "python", 527 | "name": "python3" 528 | }, 529 | "language_info": { 530 | "codemirror_mode": { 531 | "name": "ipython", 532 | "version": 3 533 | }, 534 | "file_extension": ".py", 535 | "mimetype": "text/x-python", 536 | "name": "python", 537 | "nbconvert_exporter": "python", 538 | "pygments_lexer": "ipython3", 539 | "version": "3.6.5" 540 | } 541 | }, 542 | "nbformat": 4, 543 | "nbformat_minor": 2 544 | } 545 | --------------------------------------------------------------------------------