├── sample_test ├── group_info.csv ├── split_sample │ └── split_barcode.csv ├── sample_infor.csv └── Samples_gene.fa ├── CMlib ├── subprocesspath.py ├── showprocess.py ├── show_fasta.ui ├── show_sampletable.ui ├── processing.ui ├── show_result.ui ├── split_fastq.py ├── bwa_run.py ├── show_sampletable.py ├── show_grouptable.py ├── show_fasta.py ├── show_barcodestable.py ├── output_aln_pdf.py ├── change_color.py ├── plotfigures.py ├── start.bak.ui ├── split_lanes.ui ├── start.ui ├── bwa.py ├── flash_merge.ui ├── plot_each_bam_filter.py ├── plot_pdf_filter.py ├── output_aln_fa_filter.py ├── Barplot_deletion_filter.py └── mut_rate_filter.py ├── readme.md ├── merge.py ├── split.py ├── start.py └── crisprmatch_running.py /sample_test/group_info.csv: -------------------------------------------------------------------------------- 1 | group,rep1,rep2,rep3,control,gene,strand,start,end 2 | AsCpf1-OsPDS-crRNA01,AsCpf1-OsPDS-TTTG-crRNA01_rep1,AsCpf1-OsPDS-TTTG-crRNA01_rep2,AsCpf1-OsPDS-TTTG-crRNA01_rep3,AsCpf1-OsPDS-TTTG-crRNA01_ck,OsPDS,+,136,162 -------------------------------------------------------------------------------- /CMlib/subprocesspath.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def subprocesspath(path): 5 | """ 6 | 7 | :param path: path 8 | :return: path, for subprocess, avoid white space error 9 | """ 10 | 11 | rpath = '\''+ os.path.abspath(path)+'\'' 12 | 13 | return rpath 14 | 15 | def testfun(): 16 | pass -------------------------------------------------------------------------------- /sample_test/split_sample/split_barcode.csv: -------------------------------------------------------------------------------- 1 | Index,Sample,Barcode_L,Barcode_R 1,TX180701,GATCAG,GTCCGC 2,TX180702,TAGCTT,GTCCGC 3,TX180703,TAATCG,ACAGTG 4,TX180704,TACAGC,GCCAAT 5,TX180705,GGCTAC,GTCCGC 6,TX180706,CTTGTA,GTCCGC 7,TX180707,GATCAG,GTGAAA 8,TX180708,TAGCTT,GTGAAA 9,TX180709,TATAAT,CAGATC 10,TX180710,TCATTC,ACTTGA 11,TX180711,GGCTAC,GTGAAA 12,TX180712,CTTGTA,GTGAAA -------------------------------------------------------------------------------- /sample_test/sample_infor.csv: -------------------------------------------------------------------------------- 1 | Index,Sample,Vector,Note,gRNA_PAM,start,end,Type,gene_name 2 | 1,TX.1,-,AsCpf1-OsPDS-TTTG-crRNA01_ck,tttgGAGTGAAATCTCTTGTCTTAAGG,136,162,CK,OsPDS 3 | 2,TX.2,pYPQ203-AsCpf1-OsPDS-crRNA01,AsCpf1-OsPDS-TTTG-crRNA01_rep1,tttgGAGTGAAATCTCTTGTCTTAAGG,136,162,Rep1,OsPDS 4 | 3,TX.3,pYPQ203-AsCpf1-OsPDS-crRNA01,AsCpf1-OsPDS-TTTG-crRNA01_rep3,tttgGAGTGAAATCTCTTGTCTTAAGG,136,162,Rep3,OsPDS 5 | 4,TX.4,pYPQ203-AsCpf1-OsPDS-crRNA01,AsCpf1-OsPDS-TTTG-crRNA01_rep2,tttgGAGTGAAATCTCTTGTCTTAAGG,136,162,Rep2,OsPDS -------------------------------------------------------------------------------- /CMlib/showprocess.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtWidgets import QHeaderView, QPushButton,QProgressDialog 2 | from PyQt5.QtCore import Qt 3 | 4 | def showbarprocess(content): 5 | num = int(100000) 6 | progress = QProgressDialog() 7 | progress.setWindowTitle("Please waiting") 8 | progress.setLabelText(content) 9 | progress.setCancelButton(None) ##不显示cancel button 10 | #progress.setCancelButtonText("") 11 | progress.setMinimumDuration(5) 12 | progress.setWindowModality(Qt.WindowModal) 13 | progress.setRange(0, num) 14 | for i in range(num): 15 | progress.setValue(i) 16 | 17 | else: 18 | progress.setValue(num) -------------------------------------------------------------------------------- /CMlib/show_fasta.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 867 10 | 353 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Close 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /CMlib/show_sampletable.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 866 10 | 355 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Close 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /CMlib/processing.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 400 10 | 300 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 50 20 | 60 21 | 301 22 | 23 23 | 24 | 25 | 26 | 24 27 | 28 | 29 | 30 | 31 | 32 | 50 33 | 30 34 | 101 35 | 16 36 | 37 | 38 | 39 | Processing 40 | 41 | 42 | 43 | 44 | 45 | 50 46 | 90 47 | 101 48 | 16 49 | 50 | 51 | 52 | Details 53 | 54 | 55 | 56 | 57 | 58 | 50 59 | 120 60 | 311 61 | 151 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /CMlib/show_result.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 866 10 | 580 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 20 | 21 | Arial 22 | 15 23 | 75 24 | true 25 | 26 | 27 | 28 | 1. Single sample result 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | Arial 40 | 15 41 | 75 42 | true 43 | 44 | 45 | 46 | 2. Groups result 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | Close 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /CMlib/split_fastq.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PyQt5.QtWidgets import QHeaderView, QPushButton,QProgressDialog,QProgressBar,QDialog 3 | from PyQt5.QtCore import Qt,QBasicTimer 4 | 5 | def reverse_complement(dna): 6 | complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'} 7 | return ''.join([complement[base] for base in dna[::-1]]) 8 | 9 | 10 | 11 | def split_fastq(indexnow,df,fastq,output): 12 | ''' 13 | 14 | :param df: barcode csv 15 | :param fastq: split fastqfile 16 | :param output: output directory 17 | :return: 18 | ''' 19 | all_fastq = set() 20 | lev_fastq = set() 21 | barcodes = dict() 22 | barcodes_reverse = dict() 23 | 24 | 25 | sample = df.loc[indexnow]['Sample'] 26 | left_code = df.loc[indexnow]['Barcode_L'].upper() 27 | right_code_raw = df.loc[indexnow]['Barcode_R'].upper() 28 | right_code = reverse_complement(right_code_raw) 29 | left_length = len(left_code) 30 | right_length = len(right_code) 31 | left_code_reverse = reverse_complement(left_code) 32 | right_code_reverse = reverse_complement(left_code) 33 | dirname = os.path.join(output,sample) 34 | mkdircmd = ' '.join(['mkdir',dirname]) 35 | print(mkdircmd) 36 | os.system(mkdircmd) 37 | fileq = open(os.path.join(output,sample,sample + '.extendedFrags.fastq'),'w') 38 | 39 | flag = 0 40 | with open(fastq) as seqfile: 41 | for i in seqfile: 42 | inf = i.rstrip() 43 | flag += 1 44 | if flag == 1: 45 | name = inf 46 | if flag == 2: 47 | seq = inf 48 | if flag == 4: 49 | quality = inf 50 | value = name + '\n' + seq + '\n' + '+' + '\n' + quality 51 | flag = 0 52 | if seq[:left_length] == left_code and seq[-right_length:] == right_code: 53 | seq1 = seq[left_length:-right_length] 54 | quality1 = quality[left_length:-right_length] 55 | print(name,seq1,'+',quality1,sep='\n',file=fileq) 56 | if seq[:right_length] == right_code_raw and seq[-left_length:] == left_code_reverse: 57 | seq1 = seq[right_length:-left_length] 58 | quality1 = quality[right_length:-left_length] 59 | print(name,seq1,'+',quality1,sep='\n',file=fileq) 60 | fileq.close() 61 | -------------------------------------------------------------------------------- /CMlib/bwa_run.py: -------------------------------------------------------------------------------- 1 | import os 2 | from glob import glob 3 | import pandas as pd 4 | from pyfasta import Fasta 5 | from subprocess import Popen 6 | from subprocess import PIPE 7 | 8 | def prepare(infofile, refname, output, bwabin, samtoolsbin, picardbin,inputdir): 9 | """ 10 | 11 | :param infofile: a description file of details of each sample, example: sample_infor.txt 12 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa 13 | :param output: folder of temporary files 14 | :param bwabin: bwa bin path 15 | :param samtoolsbin: samtools bin bath 16 | :param picardbin: picard bin path 17 | :return: 18 | """ 19 | datainfo=pd.read_csv(infofile,index_col="Index") 20 | outputname = os.path.join(output, 'bwa_run.sh') 21 | documentdir = os.path.abspath(inputdir) 22 | #documentdir = os.path.dirname(os.path.abspath(infofile)) 23 | genomeindex = os.path.join(output, os.path.basename(refname)) 24 | 25 | outio = open(outputname,"w") 26 | for idx in datainfo.index: 27 | fqname = documentdir+'/'+datainfo.ix[idx]['Sample']+'/'+datainfo.ix[idx]['Sample']+'.extendedFrags.fastq' 28 | bamfile = output + '/' + datainfo.ix[idx]['Note'] + '.bam' 29 | bwamemcmd = ' '.join([bwabin, 'mem', genomeindex,fqname, '|', samtoolsbin, 'view','-bS','-', '|', samtoolsbin, 'sort', '-','-o', bamfile]) 30 | samtoolscmd = ' '.join([samtoolsbin, 'index',bamfile]) 31 | print(bwamemcmd) 32 | print(samtoolscmd) 33 | bwarun = Popen(bwamemcmd, stdout=PIPE, stderr=PIPE, shell=True) 34 | bwarun.communicate() 35 | samtoolsrun = Popen(samtoolscmd, stdout=PIPE, stderr=PIPE, shell=True) 36 | samtoolsrun.communicate() 37 | 38 | # print(bwabin,' mem ', os.path.basename(refname), ' ', fqname, ' | ',picardbin,' SortSam I=/dev/stdin O=', bamfile, 39 | # ' SO=coordinate', sep='', file=outio) 40 | # print(samtoolsbin,' index ',bamfile, file=outio) 41 | #print('bwa mem ', os.path.basename(refname), ' ', fqname, ' | picard SortSam I=/dev/stdin O=', bamfile, ' SO=coordinate', sep='', 42 | # file=outio) 43 | #print('samtools index ', bamfile, file=outio) 44 | # outio.close() 45 | print("bwa command load!") 46 | 47 | # ###run bwa mem 48 | # bwacmd="bash bwa_run.sh" 49 | # print(bwacmd) 50 | # runbwaalign = Popen(bwacmd, shell=True, cwd=output) 51 | # runbwaalign.communicate() 52 | 53 | print("bwa mem finished") 54 | 55 | return True -------------------------------------------------------------------------------- /CMlib/show_sampletable.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | from PyQt5 import uic,QtWidgets 6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem 7 | from PyQt5.QtWidgets import QHeaderView 8 | import os 9 | 10 | path = os.getcwd() 11 | qtCreatorFile = os.path.join(path,'CMlib/show_sampletable.ui') 12 | 13 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile) 14 | 15 | class showtable(QtWidgets.QDialog, Ui_showtable): 16 | def __init__(self): 17 | QtWidgets.QDialog.__init__(self) 18 | Ui_showtable.__init__(self) 19 | self.setupUi(self) 20 | 21 | self.setWindowTitle('Sample Information Table') 22 | self.checkbtn.clicked.connect(self.sampleEdit) 23 | print("open sample infor table") 24 | 25 | def setuptable(self,pd): 26 | 27 | self.df=pd 28 | rown = len(self.df.index) 29 | coln = len(self.df.columns) 30 | self.model = QStandardItemModel(rown, 9) 31 | # labels = list(self.df.columns.values) 32 | # rown=len(self.df.index) 33 | # self.model = QStandardItemModel(rown,9) 34 | labels=['Index','Sample','Vector','Note','gRNA_PAM','start','end','Type','gene_name'] 35 | ###判断格式 36 | if list(self.df.columns.values) != labels: 37 | self.showMessageBox("warning", "wrong table!") 38 | return "wrong" 39 | else: 40 | self.model.setHorizontalHeaderLabels(labels) 41 | # self.tableView.resize(500,300) 42 | #下面代码让表格100填满窗口 43 | self.tableView.horizontalHeader().setStretchLastSection(True) 44 | self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) 45 | 46 | for row in range(rown): 47 | #print(self.df.loc[row].Sample) 48 | for column in range(9): 49 | item = QStandardItem(str(self.df.loc[row][labels[column]])) 50 | self.model.setItem(row, column, item) 51 | 52 | self.tableView.setModel(self.model) 53 | 54 | return "yes" 55 | 56 | def sampleEdit(self): 57 | self.close() ## 关闭窗口 58 | 59 | # ############## warning message ######### 60 | def showMessageBox(self, title, message): 61 | msgBox = QtWidgets.QMessageBox() 62 | msgBox.setIcon(QtWidgets.QMessageBox.Warning) 63 | msgBox.setWindowTitle(title) 64 | msgBox.setText(message) 65 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 66 | msgBox.exec_() 67 | ################################################## 68 | 69 | 70 | 71 | 72 | if __name__ == "__main__": 73 | app = QtWidgets.QApplication(sys.argv) 74 | window = showtable() 75 | window.show() 76 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /CMlib/show_grouptable.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | from PyQt5 import uic,QtWidgets 6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem 7 | from PyQt5.QtWidgets import QHeaderView 8 | import os 9 | 10 | path = os.getcwd() 11 | qtCreatorFile = os.path.join(path,'CMlib/show_sampletable.ui') 12 | 13 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile) 14 | 15 | class showtable(QtWidgets.QDialog, Ui_showtable): 16 | def __init__(self): 17 | QtWidgets.QDialog.__init__(self) 18 | Ui_showtable.__init__(self) 19 | self.setupUi(self) 20 | 21 | self.setWindowTitle('Group Information Table') 22 | self.checkbtn.clicked.connect(self.sampleEdit) 23 | print("open group table") 24 | 25 | def setuptable(self,pd): 26 | 27 | self.df = pd 28 | self.df = self.df.fillna("None") 29 | rown = len(self.df.index) 30 | coln = len(self.df.columns) 31 | self.model = QStandardItemModel(rown, 9) 32 | # labels = list(self.df.columns.values) 33 | # rown=len(self.df.index) 34 | # self.model = QStandardItemModel(rown,9) 35 | 36 | labels=['group','rep1','rep2','rep3','control','gene','strand','start','end'] 37 | ###判断格式 38 | if list(self.df.columns.values) != labels: 39 | self.showMessageBox("warning","wrong table!") 40 | return "wrong" 41 | else: 42 | 43 | self.model.setHorizontalHeaderLabels(labels) 44 | # self.tableView.resize(500,300) 45 | #下面代码让表格100填满窗口 46 | self.tableView.horizontalHeader().setStretchLastSection(True) 47 | self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) 48 | 49 | for row in range(rown): 50 | #print(self.df.loc[row].Sample) 51 | for column in range(9): 52 | item = QStandardItem(str(self.df.loc[row][labels[column]])) 53 | self.model.setItem(row, column, item) 54 | 55 | self.tableView.setModel(self.model) 56 | return "yes" 57 | 58 | def sampleEdit(self): 59 | self.close() ## 关闭窗口 60 | # ############## warning message ######### 61 | def showMessageBox(self, title, message): 62 | msgBox = QtWidgets.QMessageBox() 63 | msgBox.setIcon(QtWidgets.QMessageBox.Warning) 64 | msgBox.setWindowTitle(title) 65 | msgBox.setText(message) 66 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 67 | msgBox.exec_() 68 | ################################################## 69 | 70 | 71 | 72 | 73 | if __name__ == "__main__": 74 | app = QtWidgets.QApplication(sys.argv) 75 | window = showtable() 76 | window.show() 77 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /CMlib/show_fasta.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from PyQt5 import uic,QtWidgets 4 | from PyQt5.QtGui import QStandardItemModel, QStandardItem 5 | from PyQt5.QtWidgets import QHeaderView 6 | import os 7 | 8 | path = os.getcwd() 9 | qtCreatorFile = os.path.join(path,'CMlib/show_fasta.ui') 10 | 11 | Ui_showfasta, QtBaseClass = uic.loadUiType(qtCreatorFile) 12 | 13 | class showfasta(QtWidgets.QDialog, Ui_showfasta): 14 | def __init__(self): 15 | QtWidgets.QDialog.__init__(self) 16 | Ui_showfasta.__init__(self) 17 | self.setupUi(self) 18 | 19 | self.setWindowTitle('Gene Sequence') 20 | self.checkbtn.clicked.connect(self.sampleEdit) 21 | print("open gene fasta") 22 | 23 | def setuptext(self,fasta): 24 | 25 | self.fasta=fasta 26 | if '>' in self.fasta[0]: 27 | self.string=str() 28 | for i in self.fasta: 29 | self.string +=str(i) 30 | self.textEdit.setText(self.string) 31 | return "yes" 32 | else: 33 | self.showMessageBox("warning", "This is not a Fasta file") 34 | return "wrong" 35 | # rown = len(self.df.index) 36 | # coln = len(self.df.columns) 37 | # self.model = QStandardItemModel(rown, 8) 38 | # # labels = list(self.df.columns.values) 39 | # # rown=len(self.df.index) 40 | # # self.model = QStandardItemModel(rown,9) 41 | # labels=['group','rep1','rep2','control','gene','strand','start','end'] 42 | # self.model.setHorizontalHeaderLabels(labels) 43 | # # self.tableView.resize(500,300) 44 | # #下面代码让表格100填满窗口 45 | # self.tableView.horizontalHeader().setStretchLastSection(True) 46 | # self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) 47 | # 48 | # for row in range(rown): 49 | # #print(self.df.loc[row].Sample) 50 | # for column in range(8): 51 | # item = QStandardItem(str(self.df.loc[row][labels[column]])) 52 | # self.model.setItem(row, column, item) 53 | # 54 | # self.tableView.setModel(self.model) 55 | 56 | def sampleEdit(self): 57 | self.close() ## 关闭窗口 58 | # ############## warning message ######### 59 | def showMessageBox(self, title, message): 60 | msgBox = QtWidgets.QMessageBox() 61 | msgBox.setIcon(QtWidgets.QMessageBox.Warning) 62 | msgBox.setWindowTitle(title) 63 | msgBox.setText(message) 64 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 65 | msgBox.exec_() 66 | ################################################## 67 | 68 | 69 | 70 | if __name__ == "__main__": 71 | app = QtWidgets.QApplication(sys.argv) 72 | window = showfasta() 73 | window.show() 74 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /CMlib/show_barcodestable.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | from PyQt5 import uic,QtWidgets 6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem 7 | from PyQt5.QtWidgets import QHeaderView 8 | import os 9 | 10 | path = os.getcwd() 11 | qtCreatorFile = os.path.join(path,'CMlib/show_sampletable.ui') 12 | 13 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile) 14 | 15 | class showtable(QtWidgets.QDialog, Ui_showtable): 16 | def __init__(self): 17 | QtWidgets.QDialog.__init__(self) 18 | Ui_showtable.__init__(self) 19 | self.setupUi(self) 20 | 21 | self.setWindowTitle('Barcode Information Table') 22 | self.checkbtn.setText("Confirm") 23 | self.checkbtn.clicked.connect(self.sampleEdit) 24 | print("open barcode infor table") 25 | self.edit = "" 26 | self.newtable = "" 27 | 28 | def setuptable(self,pd): 29 | 30 | self.df=pd 31 | rown = len(self.df.index) 32 | coln = len(self.df.columns) 33 | self.model = QStandardItemModel(rown, 4) 34 | # labels = list(self.df.columns.values) 35 | # rown=len(self.df.index) 36 | # self.model = QStandardItemModel(rown,9) 37 | labels=['Index','Sample','Barcode_L','Barcode_R'] 38 | ###判断格式 39 | if list(self.df.columns.values) != labels: 40 | # print(list(self.df.columns.values)) 41 | self.showMessageBox("warning", "wrong table!") 42 | return "wrong" 43 | else: 44 | self.model.setHorizontalHeaderLabels(labels) 45 | # self.tableView.resize(500,300) 46 | #下面代码让表格100填满窗口 47 | self.tableView.horizontalHeader().setStretchLastSection(True) 48 | self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) 49 | 50 | for row in range(rown): 51 | #print(self.df.loc[row].Sample) 52 | for column in range(4): 53 | item = QStandardItem(str(self.df.loc[row][labels[column]])) 54 | self.model.setItem(row, column, item) 55 | 56 | self.tableView.setModel(self.model) 57 | 58 | return "yes" 59 | 60 | def sampleEdit(self): 61 | rown = len(self.df.index) 62 | coln = len(self.df.columns) 63 | # labels = ['Index', 'Sample', 'Barcode_L', 'Barcode_R'] 64 | self.newtable = self.df 65 | for row in range(rown): 66 | for column in range(coln): 67 | item = self.model.item(row, column) 68 | text=item.text() 69 | self.newtable.iloc[row,column] = text 70 | 71 | self.showMessageBox("Success","The barcode table has been edited!") 72 | 73 | self.edit = "yes" 74 | 75 | def resulttest(self): 76 | return self.edit, self.newtable 77 | 78 | # return newtable, 79 | 80 | # ############## warning message ######### 81 | def showMessageBox(self, title, message): 82 | msgBox = QtWidgets.QMessageBox() 83 | msgBox.setIcon(QtWidgets.QMessageBox.Warning) 84 | msgBox.setWindowTitle(title) 85 | msgBox.setText(message) 86 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 87 | msgBox.exec_() 88 | ################################################## 89 | def showfinishBox(self, title, message): 90 | msgBox = QtWidgets.QMessageBox() 91 | msgBox.setWindowTitle(title) 92 | msgBox.setIcon(QtWidgets.QMessageBox.Information) 93 | msgBox.setText(message) 94 | # msgBox.setDetailedText("The project has finished, please check the result!") 95 | msgBox.exec_() 96 | 97 | ################################################ 98 | 99 | 100 | 101 | if __name__ == "__main__": 102 | app = QtWidgets.QApplication(sys.argv) 103 | window = showtable() 104 | window.show() 105 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # CRISPRMatchGUI 2 | ## CRISPRMatch is no longer actively maintained. 3 | ## We release new CRIPSR data analysis software: [CrisprStitch](https://zhangtaolab.org/software/crisprstitch) . 4 | ## Brief introduction 5 | The Graphical User Interface(GUI) for CRISPRMatch--An automatic calculation and visualization tool for high-throughput CRISPR genome-editing data analysis 6 | ## I. Requirements(软件所需依赖包) 7 | Anaconda
8 | python3
9 | bwa
10 | samtools
11 | FLASH
12 | pyqt5
13 | 14 | [**Note:**] Using `Anaconda` to Install all packages (`bwa,samtools,picard,FLASH`) ##应用conda统一安装即可,方便快捷 15 | 16 | ## II. Manually Install(手动安装,非虚拟机) 17 | CentOS Linux release 7.3.1611 (terminal) 18 | 1. Install Anaconda
19 | ``` 20 | $ yum install wget git ##安装git和wget程序 21 | $ mkdir /home/software ##创建下载软件的文件夹,这里以software为例 22 | $ cd /home/software ##进入到software文件夹下 23 | $ wget https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh ##下载linux对应的conda版本,注意尽量选择3.5版本的! 24 | $ bash Anaconda3-5.0.1-Linux-x86_64.sh ##用bash命令安装conda 25 | ``` 26 | 2. Install required packages ##利用conda安装所有依赖包(建议用清华软件源镜像替换,缩短安装时间) 27 | ``` 28 | $ conda install bwa \ ##用\符号和回车分隔多个软件 29 | samtools \ 30 | pyqt=5.6 \ 31 | flash \ 32 | matplotlib \ 33 | pysam \ 34 | pandas \ 35 | argparse \ 36 | numpy \ 37 | ``` 38 | **Note:** To ensure the tool working, please using `Anaconda` to install all packages (`bwa,samtools,pyqt,FLASH ...`) 39 | 40 | 3. Download CRISPRMatchGUI and test ##下载本软件的软件包 41 | ``` 42 | $ cd /home/software ##进入到software文件夹下 43 | $ git clone https://github.com/zhangtaolab/CRISPRMatchGUI.git ##利用git方式下载本软件包 44 | $ cd /home/software/CRISPRMatchGUI/ ##进入本软件文件夹 45 | $ python3 /home/software/CRISPRMatchGUI/start.py ##使用python3打开软件包中的start.py程序,即可实现软件运行 46 | 47 | ``` 48 | ## III. Start running(运行方法) 49 | 1. Video manual(用户手册)
50 | 51 | >(1)CRISPRMatch虚拟机使用教程 52 | - Link: https://v.youku.com/v_show/id_XMzgwODc4ODQ2NA==.html?spm=a2h3j.8428770.3416059.1 53 | 54 | >(2)双端测序数据合并教程 55 | - Link: https://v.youku.com/v_show/id_XMzkzMTY5NTEwOA==.html?scm=20140719.manual.114461.video_XMzkzMTY5NTEwOA== 56 | 57 | >(3)拆分混池测序结果(带有barcode信息) 58 | - Link: https://v.youku.com/v_show/id_XMzkzMTY5MzY4NA==.html?scm=20140719.manual.114461.video_XMzkzMTY5MzY4NA== 59 | 60 | >(4)虚拟机读取usb设备(改方法可实现大数据集计算) 61 | - Link: https://v.youku.com/v_show/id_XMzk0MDgyMjA2MA==.html?scm=20140719.manual.114461.video_XMzk0MDgyMjA2MA== 62 | 63 | 2. Mirroring file for Windows (虚拟机下载地址)
64 | - Link: https://pan.baidu.com/s/1L8KPij9SP2Mp9v7RYgS5_w code: CPF1 65 | 66 | 3. Files for mutation calculation(编辑计算所需三个信息文件)
67 | - **File1**: Genome-editing target sequences 68 | [Fasta format example](https://github.com/zhangtaolab/CRISPRMatchGUI/tree/master/sample_test/Samples_gene.fa) 69 | - **File2**: NGS samples information 70 | *note*: 71 | For CRISPR-Cas9 system, the `'Note'` must contain `'gRNA'` label. 72 | For CRISPR-Cpf1 system, the `'Note'` must contain `'crRNA'` label. 73 | *example*: 74 | [sample information](https://github.com/zhangtaolab/CRISPRMatchGUI/tree/master/sample_test/sample_infor.csv) 75 | - **File3**: NGS group information 76 | *note*: At present, two repeats are supported
77 | *example*:
78 | [group information](https://github.com/zhangtaolab/CRISPRMatchGUI/tree/master/sample_test/group_info.csv) 79 | - **Note**: the information files `File1`, `File2` and `File3` are required! 80 |
81 | 82 | 4. Merge paired-end reads(运行双端测序数据合并程序)
83 | ``` 84 | $ cd /home/software/CRISPRMatchGUI/ ##进入本软件文件夹 85 | $ python3 /home/software/CRISPRMatchGUI/merge.py ##运行双端测序数据合并 86 | ``` 87 | - *example*:
88 | [paired-end reads](https://github.com/zhangtaolab/CRISPRMatchGUI/tree/master/merge_sample/) 89 |
90 | 91 | 5. Split sequencing file(运行拆分混池测序结果)
92 | ``` 93 | $ cd /home/software/CRISPRMatchGUI/ ##进入本软件文件夹 94 | $ python3 /home/software/CRISPRMatchGUI/split.py ##运行拆分混池测序程序 95 | ``` 96 | 97 | -------------------------------------------------------------------------------- /sample_test/Samples_gene.fa: -------------------------------------------------------------------------------- 1 | >OsPDS 2 | ATGgatactggctgcctgtcatctaTGAACATAACTGGAACCAGCCAAGCAAGATCTTTTGCGGGACAACTTCCTACTCATAGGTGCTTCGCAAGTAGCAGCATCCAAGCACTGAAAAGTAGTCAGCATGTGAGCTTTGGAGTGAAATCTCTTGTCTTAAGGAATAAAGGAAAAAGATTCCGTCGGAGGCTCGGTGCTCTACAGgttcaacctttgtactctattattgcctcacattccatctcttgtgaaaatatatttgattggcttttctgcagGTTGTTTGCCAGGACTTTCCAAGACCTCCACTAGAAAACACAATAAACTTTTTGGAAGCTGGACAACTATCTTCATTTTTCAGAAACAGTGAACAACCCACTAAACCATTACAGGTCGTGATTGCTGGAGCAGgtatgatataattctaggatttgacagatgaataatttacatatatatctaactttgatagcagtcacatcgtggtcttagcattgtagtttttagctttgatttttttttcagGATTAGCTGGTTTATCAACGGCAAAATATCTGGCAGATGCTGGTCATAAACCCATATTGCTTGAGGCAAGGGATGTTTTGGGTGGAAAGgttttactcttatgcttttatgttgcatttaattttttttgttattcattctttttttttttggttgcctttatcttaatagctcatattcactgttagtagcatttgtggattattgtttttttttttggggaaatgccttgaacagATAGCTGCTTGGAAGGATGAAGATGGAGATTGGTATGAAACTGGGCTTCATATCTTTTgtaagtaataactctggatttttaaggttctcgttgtgctatattttatttaggttattaccgccagcactgatagatatctctaagggttttgaacaaaaaaacatgtatcaaactctttcatcgataaggtagaaatgccatgcgggaagtatgaagtgatgtctgaggattaacacacatggtagttttattttgtaagaaacttttagattggtttttttcacagtactaaaaagtaactttttactagcttatatggttgataaattttaacgtcacataaatatcatgagctaattgaatataaatcctcctgttcatacatagtcttctttcaacctactattcccttccaaacatatatgaatatgacagatactgtttttccttccatgctcacactgttttgtcgtccacaacagtacatatgtgacattgttcattttgtgcctgtatgtaaccatatacctttttggtttaagTTGGAGCTTATCCCAACATACAGAACTTGTTTGGCGAGCTTGGTATTAATGATCGGTTGCAATGGAAGGAACACTCCATGATATTTGCCATGCCAAACAAGCCAGGAGAATTCAGCCGGTTTGATTTTCCTGAAACATTGCCTGCACCCTTAAATGgtgagatcatatgcagcgctggagttgtttaattaaaccaagattcccagaagtacatcgtattggtggttacttttgttttactaacacatgactgtaattagggggtatattactagcaacgttaatgatagatcaatagatcatgccatggagcttttatgttgtcaattgatgcctatttattatttatcattgatcatgcgtgcatttaacagGAATATGGGCCATACTAAGAAACAATGAAATGCTAACTTGGCCAGAGAAGGTGAAGTTTGCTCTTGGACTTTTGCCAGCAATGGTTGGTGGCCAAGCTTATGTTGAAGCTCAAGATGGTTTTACTGTTTCTGAGTGGATGAAAAAGCAGgtataagttcacaatatcagtttgtcaagtctctgtgtacaagacacatttctacctcattaatttggaatggatataggagaaggtgttgtaagctagaaaaccttttattttctaataaaaaaactgatgccctttattgttgcattcacattgggaagaactggcagttctgaggatgaaatgcttcatgtactcaagtttatgccctttattttgcccagatccttttgcacaggtttaagcttgagctatgcttttagtttaagaccactgtttcagttaaaggtcaacaaccttgcatgatttcttcctccacctagaaaagccattgcacatattgacaaagcacacaatcctgttgactatattctttatgagctaatatacagaactgttttatacagaaaacacaatacatatgctatagttatcaatctctttccctttttttgggataacggattaatatggtgcctgatacagttgtttgatcagcacagGGTGTTCCTGATCGAGTGAACGATGAGGTTTTCATTGCAATGTCAAAGGCACTTAATTTCATAAATCCTGATGAGTTATCCATGCAGTGCATTCTGATTGCTTTAAACCGATTTCTTCAGgtatttattatgttgctctatggtcatgtgtgttgcatatgagtaattcttctgttctttccggagtagtaccttacgtattacatccttcttagtgtttcttgtctctgttgtttcctaccttgaggaaactcaaatgaattttcgcttagaggccttttaaaaaaaattatgcaaatgtgtagGAGAAGCATGGTTCTAAGATGGCATTCTTGGATGGTAATCCTCCTGAAAGGTTATGCATGCCTATTGTTGACCATGTTCGCTCTTTGGGTGGTGAGGTTCGGCTGAATTCTCGTATTCAGAAAATAGAACTTAATCCTGATGGAACAGTGAAACACTTTGCACTTACTGATGGAACTCAAATAACTGGAGATGCTTATGTTTTTGCAACACCAGgtgattttctacaatctttgtttcttctgcagttcataaattatatatatgcggctactcattttaactgactagcctgtatttagTTGATATCTTGAAGCTTCTTGTACCTCAAGAGTGGAAAGAAATATCTTATTTCAAGAAGCTGGAGAAGTTGGTGGGAGTTCCTGTTATAAATGTTCATATATGgttggttggttgaattatttggttccaagtcggaaattactcatcatcgagtttgtggttctccttatgactcatattagtatttctgttggtttgaacatttcagGTTTGATAGAAAACTGAAGAACACATATGACCACCTTCTTTTCAGCAGgtgtctcttctaattcctcatcagttttgctgtcctttcactgcctcatgcatttgctctgtgctatgactggtttatgaactaaaacgatttgtattgcccaaattgggcacattctatcctgattttgtatacattcttgattaataccaaatatcatatgtcccatgtattgatcttgttcccttttctttcagGAGTTCACTTTTAAGTGTTTATGCGGACATGTCAGTAACTTGCAAGgtactaactaggagacattatatgttacgaaatagtaactatctgtcatgtattattgctcttgtgtatttgttcttgggtttaccatcttcaagcatcacatgatatttattttagtagctgtaacaaaaggcccaaaagtgcatgtgttacagaaggaatccagtattaattattaaacttggaaagtagatatattttatttcagattcatttaggcaacatgtcacttggctctagagtctagattttatggaccataatagctcaggaaattaaagacatggatgcctactgaacggttttctttccttttgttttgaactctttacagGAATACTATGATCCAAACCGTTCAATGCTGGAGTTGGTCTTTGCTCCTGCAGAGGAATGGGTTGGACGGAGTGACACTGAAATCATCGAAGCAACTATGCAAGAGCTAGCCAAGCTATTTCCTGATGAAATTGCTGCTGATCAGAGTAAAGCAAAGATTCTGAAGTATCATGTTGTGAAGACACCAAGgtgaggacattttgcaagagcgccccctatctgatatatcataggtaggtctaatagttggatgcacacttctctcacgttcctttcttttctgtctcactgttacagATCTGTTTACAAGACTATCCCGGACTGTGAACCTTGCCGACCTCTGCAAAGATCACCGATTGAAGGGTTCTATCTAGCTGGTGACTACACAAAGCAGAAATATTTGGCTTCGATGGAGGGTGCAGTTCTATCTGGGAAGCTTTGTGCTCAGTCTGTAGTGGAGgtaaacgctgctctccatggttctgtttgtacatagatgcatcagacttgtattgttgtcttggtgcagttcacaatgattcagttttgtaggctaatgagttatcacttgctgatttcag 3 | 4 | >OsDEP1 5 | ggcataataatctgtactactgccaaactgagcttttacatggtgaaaatattttccctgcagatcaaaattgtgtatctgcatttcatgtctttgctactgttgcaagtgctcacccaagtgcaaaagaccaaggtgcctcaattgttcttgcagctcatgctgcgacgagccatgctgtaagccaaactgcagtgcgtgctgcgctgggtcatgctgtagtccagactgctgctcatgctgtaaacctaactgcagttgctgcaagaccccttc 6 | 7 | >OsROC5 8 | aggtttgggctaatgtcctccacatttcgcaccgtatacttatgttccgttccaatcctataatgtactaatgttggtgttacttgcattcatttcacagagacggtggtctgacatgttctcgtgcatgattgctaaggcaacagtgcttgaggaggtgtctaccggcattgcaggaagcagaaatggcgcgttgctgctggtgagtgctgatcaacagtgctaatgttcatttatcttacatagtgtaagacgtagctaacatattttctttctgaattgttaatttttcttgtgtttgcttgcaacagatgaaggctgagctacaggtg -------------------------------------------------------------------------------- /CMlib/output_aln_pdf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pysam 3 | from pyfasta import Fasta 4 | import matplotlib 5 | from scipy import stats 6 | import matplotlib.pyplot as plt 7 | import pandas as pd 8 | import numpy as np 9 | import re 10 | from glob import glob 11 | 12 | def alnpdftest(infofile, output, refname, groupinfo): 13 | """ 14 | 15 | :param infofile: a description file of details of each sample, example: sample_infor.txt 16 | :param output: a description file of details of each group, example: group_infor.txt 17 | :return: 18 | """ 19 | info = pd.read_csv(infofile, index_col="Index") 20 | fa = Fasta(refname) 21 | groupinfor = pd.read_table(groupinfo) 22 | groupinfor.ix[:, pd.isnull(groupinfor).all()] = "UNKNOWN" 23 | groupinfor = groupinfor.fillna("UNKNOWN") ##填充表格中NaN处 24 | stranddict = dict() 25 | for idy in groupinfor.index: 26 | stranddict[groupinfor.loc[idy].rep1] = groupinfor.loc[idy].strand 27 | stranddict[groupinfor.loc[idy].rep2] = groupinfor.loc[idy].strand 28 | stranddict[groupinfor.loc[idy].control] = groupinfor.loc[idy].strand 29 | 30 | 31 | for idx in info.index: 32 | 33 | #bamname = os.path.join(bamdir, info.loc[idx].Note+'.bam') 34 | #print("Calculating",bamname) 35 | 36 | note = info.loc[idx].Note 37 | genename = info.loc[idx]['gene_name'] 38 | strand = stranddict[note] 39 | 40 | 41 | if (re.search("gRNA", info.loc[idx].Note)): 42 | if strand == '+': 43 | start = info.loc[idx]['start'] - 10 44 | end = info.loc[idx]['end'] + 10 45 | 46 | else: 47 | start = info.loc[idx]['start'] - 10 48 | end = info.loc[idx]['end'] + 10 49 | 50 | elif (re.search("crRNA", info.loc[idx].Note)): 51 | if strand == '+': 52 | start = info.loc[idx]['start'] 53 | end = info.loc[idx]['end'] + 30 54 | 55 | else: 56 | start = info.loc[idx]['start'] - 30 57 | end = info.loc[idx]['end'] 58 | 59 | alnfile = os.path.join(output, info.loc[idx].Note + '_aln.txt') 60 | outfile = os.path.join(output, info.loc[idx].Note + '_aln.test.pdf') 61 | 62 | delfile= os.path.join(output, info.loc[idx].Note + '_del_aln.txt') 63 | snpfile= os.path.join(output, info.loc[idx].Note + '_snp_aln.txt') 64 | 65 | catcmd = ' '.join(['cat', delfile, snpfile, '>', alnfile]) 66 | #print(catcmd) 67 | os.system(catcmd) 68 | 69 | #print("start output", alnfile, "figure") 70 | if os.path.getsize(alnfile): ## check aln file 71 | print("start output", alnfile, "figure") 72 | else: 73 | print("error", alnfile, "figure") 74 | continue 75 | data = pd.read_table(alnfile, header=None) # nrows=400,只读前400行,usecols=(0,1,2,5,6)只提取0,1,2,5,6列 76 | # print(len(data.columns)) ##统计列数 77 | # print(len(data.index)) ##统计行数 78 | withset = len(data.columns) * 2 + 10 79 | heightset = len(data.index) * 2 + 10 80 | # print(withset) 81 | # print(heightset) 82 | fig, ax = plt.subplots() 83 | fig.set_size_inches(0.01 * withset, 0.01 * heightset) 84 | #fig.set_size_inches(12, 18) 85 | ax.set_title(info.loc[idx].Note, size=2,fontdict={'family': 'sans-serif'}) 86 | ax.set_ylim(0, heightset) 87 | ax.set_xlim(0, withset) 88 | ax.set_yticks([]) ##去掉刻度线 89 | ax.set_xticks([]) 90 | ax.spines['left'].set_visible(False) ##设置边框可见性 ax.spines['left'].set_linewidth(0)可设置边框粗细 91 | ax.spines['bottom'].set_visible(False) 92 | ax.spines['right'].set_visible(False) 93 | ax.spines['top'].set_visible(False) 94 | ypos = 5 95 | 96 | seq = fa[genename][start - 1:end].upper() ##reference sequence 97 | seqlist = list() 98 | 99 | for nt in seq: 100 | seqlist.append(nt) 101 | for x in data.index: # 逐行读取txt 序列 102 | # print(x,len(data.loc[x])) 103 | n = 1 104 | xpos = 5 105 | ax.text(4, ypos + 1, data.loc[x][0], size=1, horizontalalignment='right', verticalalignment='center', ) 106 | 107 | while n < len(data.loc[x]): 108 | if (data.loc[x][n] == seq[n-1]): 109 | if (data.loc[x][n] == "A"): 110 | color = "red" 111 | elif (data.loc[x][n] == "T"): 112 | color = "blue" 113 | elif (data.loc[x][n] == "G"): 114 | color = "green" 115 | elif (data.loc[x][n] == "C"): 116 | color = "orange" 117 | else: 118 | color = "white" 119 | # print("n=",n, "data=",data.loc[x][n], "color=", color) 120 | else: 121 | color = "white" 122 | 123 | ax.broken_barh([(xpos, 2)], (ypos, 2), facecolors=color, alpha=0.2) 124 | ax.text(xpos + 1, ypos + 1, data.loc[x][n], size=1, horizontalalignment='center', 125 | verticalalignment='center') 126 | n += 1 127 | xpos += 2 128 | ypos += 2 129 | # plt.show() 130 | plt.savefig(outfile, dpi=300, format="pdf") 131 | plt.close(fig) 132 | print(outfile, "have finished") -------------------------------------------------------------------------------- /CMlib/change_color.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from PyQt5 import QtWidgets 3 | from PyQt5.QtWidgets import * 4 | from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas 5 | from matplotlib.backends.backend_qt5 import NavigationToolbar2QT as NavigationToolbar 6 | from matplotlib.ticker import MultipleLocator, FormatStrFormatter 7 | 8 | import matplotlib.pyplot as plt 9 | from PyQt5.QtCore import Qt 10 | import random 11 | from os import path 12 | import pandas as pd 13 | import numpy as np 14 | import os 15 | 16 | 17 | class Changecolor(QtWidgets.QDialog): 18 | def __init__(self, parent=None): 19 | super(Changecolor, self).__init__(parent) 20 | 21 | 22 | 23 | 24 | def deletion_ratio(self,sample,reg,regPAM,colorstring): 25 | 26 | self.color = colorstring 27 | 28 | 29 | self.figure = plt.figure(figsize=(8, 6)) 30 | self.canvas = FigureCanvas(self.figure) 31 | self.toolbar = NavigationToolbar(self.canvas, self) 32 | 33 | self.layout = QtWidgets.QVBoxLayout() 34 | self.layout.addWidget(self.toolbar) 35 | self.layout.addWidget(self.canvas) 36 | 37 | glabels = list(reg.fillna(" ").label) 38 | 39 | self.ax = self.figure.add_subplot(111) 40 | self.ax.bar(reg.index, reg.ratio, color =self.color.name()) 41 | self.ax.set_title(sample,fontdict = {'family': 'Arial'}, size = 15) 42 | # print(self.seqlistother) 43 | # print(self.seqlistother[1]) 44 | self.ax.set_xticks(reg.index,minor=True) 45 | self.ax.set_xticklabels(glabels, color="black", minor=True, fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) # minor=True表示次坐标轴 46 | self.ax.set_xticks(regPAM.index) 47 | self.ax.set_xticklabels(regPAM.label, color="red", fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) 48 | plt.ylabel('Deletion Ratio', fontdict = {'family': 'Arial'}, size = 15) 49 | self.setLayout(self.layout) 50 | self.show() 51 | 52 | def deletion_group_ratio(self, groupname, regmean, stdrr, glabels, regPAM, regck, y_ck, ckname, colorstring): 53 | print(groupname) 54 | 55 | self.color = colorstring 56 | 57 | 58 | self.figure = plt.figure(figsize=(16, 6)) 59 | self.canvas = FigureCanvas(self.figure) 60 | self.toolbar = NavigationToolbar(self.canvas, self) 61 | 62 | self.layout = QtWidgets.QVBoxLayout() 63 | self.layout.addWidget(self.toolbar) 64 | self.layout.addWidget(self.canvas) 65 | 66 | self.ax0 = self.figure.add_subplot(1, 2, 1) 67 | y = regmean 68 | y_std = stdrr 69 | self.ax0.bar(regmean.index, y, color=self.color.name()) 70 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color 71 | self.ax0.errorbar(regmean.index, y, yerr=y_std, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None', 72 | ecolor='black') 73 | self.ax0.set_title(groupname, fontdict={'family': 'Times New Roman'}, size=15) 74 | 75 | self.ax0.set_xticks(regmean.index, minor=True) 76 | self.ax0.set_xticklabels(glabels, color="black", minor=True, fontdict={'family': 'Arial', 'weight': 'bold'}, 77 | size=12) # minor=True表示次坐标轴 78 | self.ax0.set_xticks(regPAM.index) 79 | self.ax0.set_xticklabels(regPAM.label, color="red", fontdict={'family': 'Arial', 'weight': 'bold'}, size=12) 80 | 81 | self.ax0.set_ylabel('Deletion Ratio', fontdict={'family': 'Times New Roman'}, size=15) 82 | 83 | self.ax1 = self.figure.add_subplot(1, 2, 2) 84 | v = self.ax0.axis() ##返回子图1的坐标范围 85 | self.ax1.axis(v) ##设置子图2的坐标范围 86 | 87 | self.ax1.bar(regck.index, y_ck, color='grey') 88 | self.ax1.set_title(ckname, fontdict={'family': 'Times New Roman'}, size=15) 89 | self.ax1.set_xticks(regck.index, minor=True) 90 | self.ax1.set_xticklabels(glabels, color="black", minor=True, fontdict={'family': 'Arial', 'weight': 'bold'}, 91 | size=12) 92 | self.ax1.set_xticks(regPAM.index) 93 | self.ax1.set_xticklabels(regPAM.label, color="red", fontdict={'family': 'Arial', 'weight': 'bold'}, size=12) 94 | 95 | self.setLayout(self.layout) 96 | self.show() 97 | 98 | def deletion_size(self, groupname, x, sizereg,colorstring): 99 | 100 | self.color = colorstring 101 | 102 | 103 | self.figure = plt.figure(figsize=(8, 6)) 104 | self.canvas = FigureCanvas(self.figure) 105 | self.toolbar = NavigationToolbar(self.canvas, self) 106 | 107 | self.layout = QtWidgets.QVBoxLayout() 108 | self.layout.addWidget(self.toolbar) 109 | self.layout.addWidget(self.canvas) 110 | 111 | 112 | self.ax = self.figure.add_subplot(111) 113 | ymajorFormatter = FormatStrFormatter('%1.1f') ## 设置坐标轴格式 114 | self.ax.yaxis.set_major_formatter(ymajorFormatter) 115 | self.ax.bar(x, sizereg.ratio_mean, color=self.color.name()) 116 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color 117 | self.ax.errorbar(x, sizereg.ratio_mean, yerr=sizereg.ratio_stdrr, fmt='', elinewidth=0.5, capsize=2, 118 | capthick=0.5, ls='None', ecolor='black') 119 | self.ax.set_title(groupname, size=15, fontdict={'family': 'Times New Roman'}) 120 | self.ax.set_ylabel('Deletion Size (%)', size=15, fontdict={'family': 'Times New Roman'}) 121 | self.ax.set_xticks(x) 122 | self.ax.set_xticklabels(sizereg.Index, rotation=35, fontdict={'family': 'Arial'}, size=12) 123 | 124 | self.setLayout(self.layout) 125 | self.show() 126 | 127 | if __name__ == '__main__': 128 | app = QtWidgets.QApplication(sys.argv) 129 | main = Changecolor() 130 | main.setWindowTitle('Change Color Window') 131 | main.show() 132 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /merge.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | from PyQt5 import uic,QtWidgets 6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem 7 | from PyQt5.QtWidgets import QHeaderView 8 | import os 9 | 10 | from subprocess import Popen, PIPE 11 | 12 | path = os.getcwd() 13 | qtCreatorFile = os.path.join(path,'CMlib/flash_merge.ui') 14 | 15 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile) 16 | 17 | class showtable(QtWidgets.QDialog, Ui_showtable): 18 | def __init__(self): 19 | QtWidgets.QDialog.__init__(self) 20 | Ui_showtable.__init__(self) 21 | self.setupUi(self) 22 | 23 | self.setWindowTitle('Merge FastQ') 24 | self.leftbtn.clicked.connect(lambda: self.getfastq("left")) 25 | self.left.setReadOnly(True) ##设置不可输入 26 | self.rightbtn.clicked.connect(lambda: self.getfastq("right")) 27 | self.right.setReadOnly(True) 28 | self.outputbtn.clicked.connect(self.outputdir) 29 | self.output.setReadOnly(True) 30 | self.pushButton.clicked.connect(self.merge) 31 | 32 | 33 | 34 | 35 | def getfastq(self,file): 36 | fastqPath, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path) 37 | if fastqPath != "": 38 | if file == "left": 39 | print("fastq Direction", fastqPath) 40 | self.left.setText(fastqPath) 41 | self.leftfastq = fastqPath 42 | if file == "right": 43 | print("fastq Direction", fastqPath) 44 | self.right.setText(fastqPath) 45 | self.rightfastq = fastqPath 46 | 47 | def outputdir(self): 48 | outputdirpath = QtWidgets.QFileDialog.getExistingDirectory(self, 'open directory', path) 49 | if outputdirpath != "": 50 | print("Direction", outputdirpath) 51 | self.outputdirpath = outputdirpath 52 | self.output.setText(outputdirpath) 53 | self.outputfiledir = outputdirpath 54 | 55 | def merge(self): 56 | outname = self.name.text().rstrip() 57 | threadnumber = self.spinBox.value() 58 | if outname != "" and self.leftfastq != "" and self.rightfastq != "" and self.outputfiledir != "": 59 | flashpath = self.which('flash') 60 | if flashpath: 61 | flashversion = self.flash('flash') 62 | if flashversion == 'None': 63 | self.showMessageBox("warning","Please input flash directory") 64 | else: 65 | flashbin=flashpath[0] 66 | flashcmd = ' '.join([flashbin, '-o', outname, '-t', str(threadnumber), '-d', self.outputfiledir, self.leftfastq, self.rightfastq, '2>&1 | tee', os.path.join(self.outputfiledir, outname + '_flash.log')]) 67 | print(flashcmd) 68 | runflash = Popen(flashcmd, shell=True) 69 | runflash.communicate() 70 | 71 | 72 | msgBox = QtWidgets.QMessageBox() 73 | msgBox.setWindowTitle("Information") 74 | msgBox.setIcon(QtWidgets.QMessageBox.Information) 75 | msgBox.setText("Project Done!") 76 | msgBox.setDetailedText(''.join(['File ',outname,'.extendedFrags.fastq ','is located in ',self.outputfiledir,'/'])) 77 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 78 | msgBox.exec_() 79 | 80 | else: 81 | self.showMessageBox("warning","Please set output name!") 82 | 83 | def flash(self,filename): 84 | """ 85 | :param filename: 86 | :return: flash version 87 | """ 88 | flashpath = self.which(filename) 89 | flashcmd = ' '.join([flashpath[0], '--version']) 90 | # location= samtoolspath[0] 91 | flashrun = Popen(flashcmd, stdout=PIPE, stderr=PIPE, shell=True) 92 | i = flashrun.stdout.readlines()[0] 93 | version = i.decode('utf-8').rstrip('\n') 94 | flashrun.communicate() 95 | return version 96 | 97 | def which(self,filename): 98 | """docstring for which""" 99 | locations = os.environ.get("PATH").split(os.pathsep) 100 | candidates = [] 101 | for location in locations: 102 | candidate = os.path.join(location, filename) 103 | if os.path.isfile(candidate): 104 | candidates.append(candidate) 105 | return candidates 106 | 107 | 108 | # ############## warning message ######### 109 | def showMessageBox(self, title, message): 110 | msgBox = QtWidgets.QMessageBox() 111 | msgBox.setIcon(QtWidgets.QMessageBox.Warning) 112 | msgBox.setWindowTitle(title) 113 | msgBox.setText(message) 114 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 115 | msgBox.exec_() 116 | ################################################## 117 | 118 | # self.checkbtn.clicked.connect(self.sampleEdit) 119 | # print("open group table") 120 | # 121 | # def setuptable(self,pd): 122 | # 123 | # self.df=pd 124 | # rown = len(self.df.index) 125 | # coln = len(self.df.columns) 126 | # self.model = QStandardItemModel(rown, 8) 127 | # # labels = list(self.df.columns.values) 128 | # # rown=len(self.df.index) 129 | # # self.model = QStandardItemModel(rown,9) 130 | # labels=['group','rep1','rep2','control','gene','strand','start','end'] 131 | # self.model.setHorizontalHeaderLabels(labels) 132 | # # self.tableView.resize(500,300) 133 | # #下面代码让表格100填满窗口 134 | # self.tableView.horizontalHeader().setStretchLastSection(True) 135 | # self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) 136 | # 137 | # for row in range(rown): 138 | # #print(self.df.loc[row].Sample) 139 | # for column in range(8): 140 | # item = QStandardItem(str(self.df.loc[row][labels[column]])) 141 | # self.model.setItem(row, column, item) 142 | # 143 | # self.tableView.setModel(self.model) 144 | # 145 | # def sampleEdit(self): 146 | # self.close() ## 关闭窗口 147 | 148 | 149 | 150 | 151 | if __name__ == "__main__": 152 | app = QtWidgets.QApplication(sys.argv) 153 | window = showtable() 154 | window.show() 155 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /split.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | from PyQt5 import uic,QtWidgets 6 | from CMlib.show_barcodestable import showtable as showbarcode 7 | from multiprocessing import Pool 8 | from functools import partial 9 | from CMlib.split_fastq import split_fastq 10 | from PyQt5.QtWidgets import QHeaderView, QPushButton,QProgressDialog 11 | from PyQt5.QtCore import Qt, QBasicTimer 12 | import os 13 | 14 | from subprocess import Popen, PIPE 15 | 16 | path = os.getcwd() 17 | qtCreatorFile = os.path.join(path,'CMlib/split_lanes.ui') 18 | 19 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile) 20 | 21 | class showtable(QtWidgets.QDialog, Ui_showtable): 22 | def __init__(self): 23 | QtWidgets.QDialog.__init__(self) 24 | Ui_showtable.__init__(self) 25 | self.setupUi(self) 26 | 27 | self.setWindowTitle('Split FastQ') 28 | self.resize(500,400) 29 | # self.fastqbtn.clicked.connect(lambda: self.getfastq("left")) 30 | self.fastqbtn.clicked.connect(self.getfastq) 31 | self.fastqline.setReadOnly(True) ##设置不可输入 32 | # self.rightbtn.clicked.connect(lambda: self.getfastq("right")) 33 | # self.right.setReadOnly(True) 34 | self.barcodebtn.clicked.connect(self.barcodeinfo) 35 | self.barcodeline.setReadOnly(True) 36 | self.outputbtn.clicked.connect(self.outputdir) 37 | self.outputline.setReadOnly(True) 38 | 39 | self.showbtn.clicked.connect(self.showtable) 40 | self.splitbtn.clicked.connect(self.split) 41 | self.resetbtn.clicked.connect(self.reset) 42 | 43 | self.path1 = "" 44 | self.path2 = "" 45 | self.path1check = "" 46 | self.edit = "" 47 | 48 | 49 | def getfastq(self): 50 | fastqPath, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path) 51 | if fastqPath != "": 52 | print("fastq Direction", fastqPath) 53 | self.fastqline.setText(fastqPath) 54 | # self.fastq = fastqPath 55 | self.path2 = fastqPath 56 | 57 | 58 | 59 | 60 | def barcodeinfo(self): 61 | barcodepath, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path) 62 | if barcodepath != "": 63 | print("Direction", barcodepath) 64 | self.barcodeline.setText(barcodepath) 65 | self.dfbarcode = pd.read_csv(str(barcodepath)) 66 | self.path1 = barcodepath 67 | 68 | def outputdir(self): 69 | outputdirpath = QtWidgets.QFileDialog.getExistingDirectory(self, 'open directory', path) 70 | if outputdirpath != "": 71 | print("Direction", outputdirpath) 72 | self.outputdirpath = outputdirpath 73 | self.outputline.setText(outputdirpath) 74 | self.outputfiledir = outputdirpath 75 | 76 | 77 | def showtable(self): 78 | if self.path1 !="": 79 | self.ui = showbarcode() ##打开showtable新窗口 80 | result = self.ui.setuptable(self.dfbarcode) ##传递倒入sample csv 81 | 82 | if result == "yes": 83 | self.ui.show() ##显示窗 84 | # self.newdfbarcode, self.edit = self.ui.sampleEdit() 85 | 86 | # print(newdf) 87 | self.path1check = self.path1 88 | else: 89 | self.path1check = "" 90 | self.path1 = "" 91 | else: 92 | self.showMessageBox('Warning', 'Please load Sample information Table first') 93 | self.path1check="" 94 | 95 | def reset(self): 96 | self.path1 = "" 97 | self.path2 = "" 98 | self.path1check = "" 99 | self.edit = "" 100 | 101 | 102 | def split(self): 103 | if self.path2 != "": 104 | if self.path1check != "": 105 | self.edit, self.newdfbarcode = self.ui.resulttest() ##check table has fixed 106 | if self.edit == "yes": 107 | self.showbarprocess("Prepare for splitting...") 108 | # self.figures = Example() 109 | # self.figures.initUI() 110 | # self.figures.show() 111 | 112 | pool = Pool(4) 113 | pool.map(partial(split_fastq, df=self.newdfbarcode, fastq=self.path2, output=self.outputfiledir), 114 | list(self.newdfbarcode.index)) 115 | # pool.map(partial(split_fastq,df=self.dfbarcode,fastq=self.path2,output=self.outputfiledir),list(self.dfbarcode.index)) 116 | print('done') 117 | # self.showMessageBox('Warning', 'Please click show buttons for information checking') 118 | self.showfinishBox('Information','The project have been done!') 119 | else: 120 | self.showMessageBox('Warning', 'Please click "Confirm" button for barcodes checking') 121 | self.edit = "" 122 | 123 | 124 | else: 125 | self.showMessageBox('Warning', 'Please click show buttons for information checking and confirming') 126 | self.path1check = "" 127 | else: 128 | self.showMessageBox('Warning', 'Please load fastq file first') 129 | self.path2 = "" 130 | 131 | 132 | 133 | # ############## warning message ######### 134 | def showMessageBox(self, title, message): 135 | msgBox = QtWidgets.QMessageBox() 136 | msgBox.setIcon(QtWidgets.QMessageBox.Warning) 137 | msgBox.setWindowTitle(title) 138 | msgBox.setText(message) 139 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 140 | msgBox.exec_() 141 | ################################################## 142 | 143 | def showfinishBox(self, title, message): 144 | msgBox = QtWidgets.QMessageBox() 145 | msgBox.setWindowTitle(title) 146 | msgBox.setIcon(QtWidgets.QMessageBox.Information) 147 | msgBox.setText(message) 148 | msgBox.setDetailedText("The project has finished, please check the result!") 149 | msgBox.exec_() 150 | 151 | ################################################ 152 | 153 | def showbarprocess(self,content): 154 | 155 | num = int(100000) 156 | progress = QProgressDialog(parent=self) 157 | progress.setWindowTitle("Start Processing ...") 158 | progress.setLabelText(content) 159 | # progress.setCancelButtonText("0") 160 | progress.setCancelButton(None) ##不显示cancel button 161 | progress.setMinimumDuration(5) 162 | progress.setWindowModality(Qt.WindowModal) 163 | progress.setRange(0, num) 164 | 165 | for i in range(num): 166 | progress.setValue(i) 167 | else: 168 | progress.setValue(num) 169 | 170 | progress.cancel() ##直接关闭 171 | 172 | 173 | if __name__ == "__main__": 174 | app = QtWidgets.QApplication(sys.argv) 175 | window = showtable() 176 | window.show() 177 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /CMlib/plotfigures.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from PyQt5 import QtWidgets 3 | from PyQt5.QtWidgets import * 4 | from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas 5 | from matplotlib.backends.backend_qt5 import NavigationToolbar2QT as NavigationToolbar 6 | from matplotlib.ticker import MultipleLocator, FormatStrFormatter 7 | 8 | import matplotlib.pyplot as plt 9 | from PyQt5.QtCore import Qt 10 | from CMlib.change_color import Changecolor 11 | import random 12 | from os import path 13 | import pandas as pd 14 | import numpy as np 15 | import os 16 | 17 | 18 | class Window(QtWidgets.QDialog): 19 | def __init__(self, parent=None): 20 | super(Window, self).__init__(parent) 21 | 22 | 23 | 24 | ############plot deletion ratio bar################################### 25 | def deletion_ratio(self,sample,reg,regPAM): 26 | 27 | self.button = QPushButton('Color', self) 28 | 29 | self.button.move(20, 20) 30 | 31 | self.figure = plt.figure(figsize=(8, 6)) 32 | self.canvas = FigureCanvas(self.figure) 33 | self.toolbar = NavigationToolbar(self.canvas, self) 34 | 35 | self.layout = QtWidgets.QVBoxLayout() 36 | self.layout.addWidget(self.toolbar) 37 | self.layout.addWidget(self.canvas) 38 | self.layout.addWidget(self.button) 39 | 40 | glabels = list(reg.fillna(" ").label) 41 | 42 | self.button.clicked.connect(lambda: self.showDialog(sample,reg,regPAM)) 43 | self.ax = self.figure.add_subplot(111) 44 | self.ax.bar(reg.index, reg.ratio, color='blue') 45 | self.ax.set_title(sample,fontdict = {'family': 'Arial'}, size = 15) 46 | # print(self.seqlistother) 47 | # print(self.seqlistother[1]) 48 | self.ax.set_xticks(reg.index,minor=True) 49 | self.ax.set_xticklabels(glabels, color="black", minor=True, fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) # minor=True表示次坐标轴 50 | self.ax.set_xticks(regPAM.index) 51 | self.ax.set_xticklabels(regPAM.label, color="red", fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) 52 | plt.ylabel('Deletion Ratio (%)', fontdict = {'family': 'Arial'}, size = 15) 53 | self.setLayout(self.layout) 54 | self.show() 55 | 56 | def showDialog(self,sample,reg,regPAM): 57 | self.color = QColorDialog.getColor() 58 | if self.color.isValid(): 59 | self.ui = Changecolor() 60 | self.ui.deletion_ratio(sample,reg,regPAM,self.color) 61 | self.ui.show() 62 | ####################################################################### 63 | 64 | ############deletion group ratio bar################################### 65 | def deletion_group_ratio(self,groupname,regmean,stdrr,glabels,regPAM,regck,y_ck,ckname): 66 | print(groupname) 67 | 68 | self.button = QPushButton('Color', self) 69 | 70 | self.button.move(20, 20) 71 | 72 | self.figure = plt.figure(figsize=(16, 6)) 73 | 74 | self.canvas = FigureCanvas(self.figure) 75 | self.toolbar = NavigationToolbar(self.canvas, self) 76 | 77 | self.layout = QtWidgets.QVBoxLayout() 78 | self.layout.addWidget(self.toolbar) 79 | self.layout.addWidget(self.canvas) 80 | self.layout.addWidget(self.button) 81 | 82 | self.button.clicked.connect(lambda: self.changec_group(groupname,regmean,stdrr,glabels,regPAM,regck,y_ck,ckname)) 83 | 84 | self.ax0 = self.figure.add_subplot(1,2,1) 85 | y = regmean 86 | y_std = stdrr 87 | self.ax0.bar(regmean.index, y, color='purple') 88 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color 89 | self.ax0.errorbar(regmean.index, y, yerr=y_std, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None', 90 | ecolor='black') 91 | self.ax0.set_title(groupname,fontdict = {'family': 'Times New Roman'}, size = 15) 92 | 93 | self.ax0.set_xticks(regmean.index, minor=True) 94 | self.ax0.set_xticklabels(glabels, color="black", minor=True, fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) # minor=True表示次坐标轴 95 | self.ax0.set_xticks(regPAM.index) 96 | self.ax0.set_xticklabels(regPAM.label, color="red", fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) 97 | 98 | self.ax0.set_ylabel('Deletion Ratio (%)', fontdict={'family': 'Times New Roman'}, size=15) 99 | 100 | self.ax1 = self.figure.add_subplot(1,2,2) 101 | v=self.ax0.axis() ##返回子图1的坐标范围 102 | self.ax1.axis(v) ##设置子图2的坐标范围 103 | 104 | self.ax1.bar(regck.index, y_ck, color='grey') 105 | self.ax1.set_title(ckname,fontdict = {'family': 'Times New Roman'}, size = 15) 106 | self.ax1.set_xticks(regck.index,minor=True) 107 | self.ax1.set_xticklabels(glabels, color="black", minor=True, fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) 108 | self.ax1.set_xticks(regPAM.index) 109 | self.ax1.set_xticklabels(regPAM.label, color="red", fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) 110 | 111 | self.setLayout(self.layout) 112 | self.show() 113 | 114 | def changec_group(self,groupname,regmean,stdrr,glabels,regPAM,regck,y_ck,ckname): 115 | self.color = QColorDialog.getColor() 116 | if self.color.isValid(): 117 | self.ui = Changecolor() 118 | self.ui.deletion_group_ratio(groupname,regmean,stdrr,glabels,regPAM,regck,y_ck,ckname,self.color) 119 | self.ui.show() 120 | ####################################################################### 121 | 122 | ############deletion size bar################################### 123 | def deletion_size(self,groupname, x,sizereg): 124 | print(groupname) 125 | self.button = QPushButton('Color', self) 126 | 127 | self.button.move(20, 20) 128 | 129 | self.figure = plt.figure(figsize=(8, 6)) 130 | self.canvas = FigureCanvas(self.figure) 131 | self.toolbar = NavigationToolbar(self.canvas, self) 132 | 133 | self.layout = QtWidgets.QVBoxLayout() 134 | self.layout.addWidget(self.toolbar) 135 | self.layout.addWidget(self.canvas) 136 | self.layout.addWidget(self.button) 137 | 138 | self.button.clicked.connect(lambda: self.changec_size(groupname, x, sizereg)) 139 | 140 | 141 | self.ax = self.figure.add_subplot(111) 142 | ymajorFormatter = FormatStrFormatter('%1.1f') ## 设置坐标轴格式 143 | self.ax.yaxis.set_major_formatter(ymajorFormatter) 144 | self.ax.bar(x, sizereg.ratio_mean, color='red') 145 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color 146 | self.ax.errorbar(x, sizereg.ratio_mean, yerr=sizereg.ratio_stdrr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None',ecolor='black') 147 | self.ax.set_title(groupname, size=15, fontdict={'family': 'Times New Roman'}) 148 | self.ax.set_ylabel('Deletion Size (%)', size=15, fontdict={'family': 'Times New Roman'}) 149 | self.ax.set_xticks(x) 150 | self.ax.set_xticklabels(sizereg.Index, rotation=35, fontdict={'family': 'Arial'}, size=12) 151 | 152 | self.setLayout(self.layout) 153 | self.show() 154 | 155 | def changec_size(self,groupname, x,sizereg): 156 | self.color = QColorDialog.getColor() 157 | if self.color.isValid(): 158 | self.ui = Changecolor() 159 | self.ui.deletion_size(groupname, x,sizereg, self.color) 160 | self.ui.show() 161 | 162 | ####################################################################### 163 | 164 | if __name__ == '__main__': 165 | app = QtWidgets.QApplication(sys.argv) 166 | main = Window() 167 | main.setWindowTitle('Bar plot') 168 | main.show() 169 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /CMlib/start.bak.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | mainWindow 4 | 5 | 6 | 7 | 0 8 | 0 9 | 800 10 | 600 11 | 12 | 13 | 14 | MainWindow 15 | 16 | 17 | 18 | 19 | 20 | 290 21 | 10 22 | 231 23 | 51 24 | 25 | 26 | 27 | 28 | Arial 29 | 24 30 | 75 31 | true 32 | 33 | 34 | 35 | CRISPRMatch Start 36 | 37 | 38 | Qt::AlignCenter 39 | 40 | 41 | 42 | 43 | 44 | 60 45 | 410 46 | 231 47 | 51 48 | 49 | 50 | 51 | 52 | Arial 53 | 15 54 | 75 55 | true 56 | 57 | 58 | 59 | 5. Processing info 60 | 61 | 62 | 63 | 64 | 65 | 100 66 | 458 67 | 471 68 | 101 69 | 70 | 71 | 72 | 73 | 74 | 75 | 620 76 | 490 77 | 113 78 | 32 79 | 80 | 81 | 82 | 83 | Arial 84 | 20 85 | 86 | 87 | 88 | background-color: rgb(189, 53, 35); 89 | 90 | 91 | Start 92 | 93 | 94 | 95 | 96 | 97 | 340 98 | 120 99 | 231 100 | 211 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | Load 110 | 111 | 112 | 113 | 114 | 115 | 116 | Show 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | Load 128 | 129 | 130 | 131 | 132 | 133 | 134 | Show 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | Load 146 | 147 | 148 | 149 | 150 | 151 | 152 | Show 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 100 164 | 370 165 | 202 166 | 24 167 | 168 | 169 | 170 | 171 | 172 | 173 | Input 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | ... 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 360 193 | 370 194 | 214 195 | 24 196 | 197 | 198 | 199 | 200 | 201 | 202 | Output 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | ... 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 60 222 | 100 223 | 231 224 | 261 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | Arial 233 | 15 234 | 75 235 | true 236 | 237 | 238 | 239 | 1. Load Sample Information 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | Arial 248 | 15 249 | 75 250 | true 251 | 252 | 253 | 254 | 2. Load Gene Sequence 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | Arial 263 | 15 264 | 75 265 | true 266 | 267 | 268 | 269 | 3. Load Group Information 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | Arial 278 | 15 279 | 75 280 | true 281 | 282 | 283 | 284 | 4. Input and Output Directory 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | -------------------------------------------------------------------------------- /CMlib/split_lanes.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 485 10 | 403 11 | 12 | 13 | 14 | 15 | 0 16 | 0 17 | 18 | 19 | 20 | Dialog 21 | 22 | 23 | 24 | 25 | 12 26 | 12 27 | 471 28 | 21 29 | 30 | 31 | 32 | 33 | 0 34 | 0 35 | 36 | 37 | 38 | 39 | Arial 40 | 18 41 | 75 42 | true 43 | 44 | 45 | 46 | Sample Split 47 | 48 | 49 | Qt::AlignCenter 50 | 51 | 52 | 53 | 54 | 55 | 12 56 | 41 57 | 461 58 | 281 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | Arial 69 | 13 70 | 75 71 | true 72 | 73 | 74 | 75 | 1. Load merged fastq 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 14 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | ... 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | Arial 111 | 13 112 | 75 113 | true 114 | 115 | 116 | 117 | 2. Load barcode table 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 14 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | ... 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | Arial 150 | 13 151 | 75 152 | true 153 | 154 | 155 | 156 | 3. Output directory 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 14 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | ... 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 30 188 | 350 189 | 421 190 | 33 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 0 199 | 0 200 | 201 | 202 | 203 | 204 | Arial 205 | 15 206 | 207 | 208 | 209 | background-color: rgb(255, 255, 255); 210 | color: rgb(25, 25, 25); 211 | 212 | 213 | Barcodes 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 0 222 | 0 223 | 224 | 225 | 226 | 227 | Arial 228 | 15 229 | 230 | 231 | 232 | background-color: rgb(230, 230, 230); 233 | color: rgb(252, 1, 7); 234 | 235 | 236 | Split Now! 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 0 245 | 0 246 | 247 | 248 | 249 | 250 | Arial 251 | 15 252 | 253 | 254 | 255 | background-color: rgb(255, 255, 255); 256 | 257 | 258 | Reset 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | resetbtn 269 | clicked() 270 | fastqline 271 | clear() 272 | 273 | 274 | 373 275 | 352 276 | 277 | 278 | 359 279 | 105 280 | 281 | 282 | 283 | 284 | resetbtn 285 | clicked() 286 | barcodeline 287 | clear() 288 | 289 | 290 | 384 291 | 358 292 | 293 | 294 | 385 295 | 205 296 | 297 | 298 | 299 | 300 | resetbtn 301 | clicked() 302 | outputline 303 | clear() 304 | 305 | 306 | 307 307 | 364 308 | 309 | 310 | 308 311 | 301 312 | 313 | 314 | 315 | 316 | 317 | -------------------------------------------------------------------------------- /CMlib/start.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | mainWindow 4 | 5 | 6 | 7 | 0 8 | 0 9 | 600 10 | 500 11 | 12 | 13 | 14 | MainWindow 15 | 16 | 17 | 18 | 19 | 20 | 0 21 | 0 22 | 601 23 | 51 24 | 25 | 26 | 27 | 28 | 0 29 | 0 30 | 31 | 32 | 33 | 34 | Arial 35 | 18 36 | 75 37 | true 38 | 39 | 40 | 41 | CRISPRMatch Start 42 | 43 | 44 | Qt::AlignCenter 45 | 46 | 47 | 48 | 49 | 50 | 20 51 | 320 52 | 281 53 | 51 54 | 55 | 56 | 57 | 58 | Arial 59 | 13 60 | 75 61 | true 62 | 63 | 64 | 65 | 5. Processing info 66 | 67 | 68 | 69 | 70 | 71 | 20 72 | 360 73 | 411 74 | 111 75 | 76 | 77 | 78 | 79 | 80 | 81 | 460 82 | 390 83 | 113 84 | 32 85 | 86 | 87 | 88 | 89 | Arial 90 | 18 91 | 92 | 93 | 94 | background-color: rgb(189, 53, 35); 95 | 96 | 97 | Start 98 | 99 | 100 | 101 | 102 | 103 | 300 104 | 60 105 | 231 106 | 161 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | Load 116 | 117 | 118 | 119 | 120 | 121 | 122 | Show 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | Load 134 | 135 | 136 | 137 | 138 | 139 | 140 | Show 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | Load 152 | 153 | 154 | 155 | 156 | 157 | 158 | Show 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 40 170 | 290 171 | 202 172 | 24 173 | 174 | 175 | 176 | 177 | 178 | 179 | Input 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | ... 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 310 199 | 290 200 | 214 201 | 24 202 | 203 | 204 | 205 | 206 | 207 | 208 | Output 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | ... 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 20 228 | 60 229 | 281 230 | 211 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | Arial 239 | 13 240 | 75 241 | true 242 | 243 | 244 | 245 | 1. Load Sample Information 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | Arial 254 | 13 255 | 75 256 | true 257 | 258 | 259 | 260 | 2. Load Gene Sequence 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | Arial 269 | 13 270 | 75 271 | true 272 | 273 | 274 | 275 | 3. Load Group Information 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | Arial 284 | 13 285 | 75 286 | true 287 | 288 | 289 | 290 | 4. Input and Output Directory 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 460 300 | 440 301 | 113 302 | 32 303 | 304 | 305 | 306 | 307 | Arial 308 | 18 309 | 310 | 311 | 312 | 313 | 314 | 315 | Result 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | -------------------------------------------------------------------------------- /CMlib/bwa.py: -------------------------------------------------------------------------------- 1 | import os 2 | from subprocess import Popen 3 | from subprocess import PIPE 4 | import re 5 | import shutil 6 | from CMlib import subprocesspath 7 | import time 8 | import signal 9 | 10 | 11 | def testbwa(bwabin): 12 | """ 13 | 14 | :param bwabin: bwa bin path 15 | :return: bool, True: bwa tested ok. False: bwa error 16 | """ 17 | 18 | bwacmd = [bwabin] 19 | 20 | bwarun = Popen(bwacmd, stdout=PIPE, stderr=PIPE, shell=True) 21 | 22 | # bwarun.communicate() 23 | 24 | testres = False 25 | 26 | pat = re.compile('Version') 27 | 28 | for i in bwarun.stderr.readlines(): 29 | 30 | i = i.decode('utf-8').rstrip('\n') 31 | 32 | if re.search(pat, i): 33 | 34 | testres = True 35 | 36 | bwarun.communicate() 37 | 38 | return testres 39 | 40 | 41 | def bwaversion(bwabin): 42 | """ 43 | 44 | :param bwabin: bwa bin path 45 | :return: string, version of bwa 46 | """ 47 | 48 | bwacmd = [bwabin] 49 | 50 | bwarun = Popen(bwacmd, stdout=PIPE, stderr=PIPE) 51 | 52 | pat = re.compile('Version') 53 | 54 | version = 'None' 55 | 56 | for i in bwarun.stderr.readlines(): 57 | 58 | i = i.decode('utf-8').rstrip('\n') 59 | 60 | if re.search(pat, i): 61 | 62 | (_, version) = i.split(' ') 63 | 64 | bwarun.communicate() 65 | 66 | return version 67 | 68 | def bwaindex(bwabin, reffile, samplefolder): 69 | """ 70 | bwa index 71 | :param bwabin: bwa bin path 72 | :param reffile: reference genome file 73 | :param samplefolder: sample dir 74 | :return: no retrun 75 | """ 76 | 77 | refbasename = os.path.basename(os.path.abspath(reffile)) 78 | 79 | dscopy = os.path.join(samplefolder, refbasename) 80 | 81 | shutil.copyfile(os.path.abspath(reffile), dscopy) 82 | 83 | # refinsample = os.path.join(samplefolder, refbasename) 84 | bwabin = os.path.abspath(bwabin) 85 | 86 | bwacmd = [bwabin, 'index', refbasename] 87 | # print(bwacmd) 88 | runbwaindex = Popen(bwacmd, cwd=samplefolder) 89 | 90 | runbwaindex.communicate() 91 | 92 | 93 | 94 | 95 | def bwaalign(bwabin, reffile, inputfile, outfile, threadnumber=1): 96 | """ 97 | bwa mem alignment 98 | :param bwabin: bwa bin path 99 | :param reffile: reference file, make by bwa index 100 | :param inputfile: sequence or reads file 101 | :param outfile: samfile 102 | :param threadnumber: number of threads 103 | :return: True 104 | """ 105 | 106 | # bwabin = subprocesspath.subprocesspath(bwabin) 107 | 108 | ##/Users/Forrest/SVN/bwa/bwa mem -O 0 -B 0 -E 0 -k 5 ../DM_404.fa oligo_tmp2.fa 109 | bwabin = subprocesspath.subprocesspath(bwabin) 110 | reffile = subprocesspath.subprocesspath(reffile) 111 | inputfile = subprocesspath.subprocesspath(inputfile) 112 | outfile = subprocesspath.subprocesspath(outfile) 113 | 114 | bwacmd = ' '.join([bwabin, 'mem', '-O',' 0',' -B',' 0',' -E',' 0',' -k',' 5', '-t',str(threadnumber), reffile, inputfile, '>', outfile]) 115 | 116 | print(bwacmd) 117 | 118 | runbwaalign = Popen(bwacmd, shell=True) 119 | 120 | runbwaalign.communicate() 121 | 122 | return True 123 | 124 | def samfilter(samfile, minas, maxxs): 125 | """ 126 | 127 | :param samfile: samfile 128 | :param minas: min AS:i score, suggest probe length 129 | :param maxxs: max XS:i score, suggest probe length * homology 130 | :return: list, list of probe/sequence 131 | """ 132 | seqlist = list() 133 | 134 | pat = re.compile('^@') 135 | 136 | inio = open(samfile,'r') 137 | 138 | aspat = re.compile('AS:i:(\d.)') 139 | 140 | xspat = re.compile('XS:i:(\d.)') 141 | 142 | for i in inio.readlines(): 143 | 144 | i = i.rstrip('\n') 145 | 146 | if not re.search(pat, i): 147 | 148 | asmatch = re.search(aspat, i) 149 | 150 | xsmatch = re.search(xspat, i) 151 | 152 | if asmatch: 153 | 154 | asscore = int(asmatch.group(1)) 155 | 156 | else: 157 | 158 | continue 159 | 160 | if xsmatch: 161 | 162 | xsscore = int(xsmatch.group(1)) 163 | 164 | else: 165 | 166 | continue 167 | 168 | if (asscore >= minas) & (xsscore < maxxs): 169 | 170 | mapinfo = i.split('\t') 171 | 172 | seqlist.append(mapinfo[9]) 173 | 174 | return seqlist 175 | 176 | 177 | def stop_bwa(p=None): 178 | 179 | """ 180 | kill all jellyfish process 181 | :param p: pid of bwa 182 | :return: no return 183 | """ 184 | 185 | if p is not None: 186 | 187 | os.kill(p.pid, signal.SIGTERM) 188 | 189 | time.sleep(5) 190 | 191 | else: 192 | 193 | pids = [] 194 | p = Popen('ps -A', shell=True, stdout=PIPE) 195 | 196 | lines = p.stdout.readlines() 197 | 198 | for line in lines: 199 | 200 | if b'bwa' in line: 201 | 202 | pids.append(int(line.split()[0])) 203 | 204 | for pid in pids: 205 | 206 | os.kill(pid,signal.SIGTERM) 207 | 208 | time.sleep(10) 209 | 210 | 211 | def bwaloci(bwabin, reffile, inputfile, threadnumber=1): 212 | 213 | pat = re.compile('^@') 214 | 215 | bwabin = subprocesspath.subprocesspath(bwabin) 216 | reffile = subprocesspath.subprocesspath(reffile) 217 | inputfile = subprocesspath.subprocesspath(inputfile) 218 | 219 | 220 | bwacmd = ' '.join([bwabin, 'mem', '-O',' 0',' -B',' 0',' -E',' 0',' -k',' 5', '-t',str(threadnumber), reffile, inputfile]) 221 | 222 | print(bwacmd) 223 | 224 | runbwaalign = Popen(bwacmd, shell=True, stdout=PIPE) 225 | 226 | res = list() 227 | 228 | for lin in runbwaalign.stdout.readlines(): 229 | 230 | lin = lin.decode('utf-8').rstrip('\n') 231 | 232 | if not re.search(pat, lin): 233 | 234 | infor = lin.split('\t') 235 | 236 | seqnmae = infor[2] 237 | 238 | start = infor[3] 239 | 240 | probeseq = infor[9] 241 | 242 | res.append('\t'.join([probeseq, seqnmae, start])) 243 | 244 | return res 245 | 246 | 247 | def bwafilter(bwabin, reffile, inputfile, minas, maxxs ,threadnumber=1 ): 248 | 249 | pat = re.compile('^@') 250 | 251 | bwabin = subprocesspath.subprocesspath(bwabin) 252 | 253 | reffile = subprocesspath.subprocesspath(reffile) 254 | 255 | inputfile = subprocesspath.subprocesspath(inputfile) 256 | 257 | bwacmd = ' '.join([bwabin, 'mem', '-O',' 0',' -B',' 0',' -E',' 0',' -k',' 5', '-t',str(threadnumber), reffile, inputfile]) 258 | 259 | print(bwacmd) 260 | 261 | aspat = re.compile('AS:i:(\d.)') 262 | 263 | xspat = re.compile('XS:i:(\d.)') 264 | 265 | runbwaalign = Popen(bwacmd, shell=True, stdout=PIPE) 266 | 267 | res = list() 268 | 269 | for lin in runbwaalign.stdout.readlines(): 270 | # print("before decode",lin) 271 | lin = lin.decode('utf-8').rstrip('\n') 272 | # print("after decode", lin) 273 | if not re.search(pat, lin): 274 | 275 | infor = lin.split('\t') 276 | 277 | seqnmae = infor[2] 278 | 279 | start = infor[3] 280 | 281 | probeseq = infor[9] 282 | 283 | asmatch = re.search(aspat, lin) 284 | 285 | xsmatch = re.search(xspat, lin) 286 | 287 | if asmatch: 288 | 289 | asscore = int(asmatch.group(1)) 290 | 291 | else: 292 | 293 | continue 294 | 295 | if xsmatch: 296 | 297 | xsscore = int(xsmatch.group(1)) 298 | 299 | else: 300 | 301 | continue 302 | 303 | if (asscore >= minas) & (xsscore < maxxs): 304 | 305 | res.append('\t'.join([probeseq, seqnmae, start])) 306 | 307 | 308 | runbwaalign.stdout.close() 309 | 310 | runbwaalign.wait() 311 | 312 | return res 313 | 314 | 315 | # runbwaalign.communicate() 316 | 317 | def bwareflength(bwabin, reffile): 318 | 319 | pat = re.compile('@SQ') 320 | 321 | bwabin = subprocesspath.subprocesspath(bwabin) 322 | 323 | reffile = subprocesspath.subprocesspath(reffile) 324 | 325 | bwacmd = ' '.join([bwabin, 'mem', reffile, '-']) 326 | 327 | runbwaalign = Popen(bwacmd, shell=True, stdout=PIPE, stdin=PIPE) 328 | 329 | runbwaalign.stdin.write('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'.encode('ascii')) 330 | 331 | runbwaalign.stdin.close() 332 | 333 | seqlength = dict() 334 | 335 | for i in runbwaalign.stdout: 336 | 337 | i = i.decode("utf-8") 338 | 339 | i = i.rstrip('\n') 340 | 341 | if re.search(pat, i): 342 | 343 | (_, seqname, seqlen) = i.split('\t') 344 | 345 | seqname = str(seqname.replace('SN:', '')) 346 | 347 | seqlen = int(seqlen.replace('LN:', '')) 348 | 349 | seqlength[seqname] = seqlen 350 | 351 | return seqlength 352 | 353 | #echo 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' | bwa mem Zea_mays.AGPv3.23.dna.genome.fa - 354 | 355 | if __name__ == '__main__': 356 | 357 | 358 | bwapath = '../bin/bwa/x86_64-Darwin/bwa' 359 | 360 | seqlength = bwareflength(bwapath, '../Test/DM_404.fa') 361 | 362 | print(seqlength) 363 | 364 | # bwaalign(bwapath, '../Test/DM_404.fa', '../Test/Testsampe/oligo_tmp2.fa', '../Test/Testsampe/outfile.sam',4) 365 | # bwaindex(bwapath, '../Test/DM_404.fa', '../Test/Testsampe/') 366 | 367 | # bwapath = subprocesspath.subprocesspath(bwapath) 368 | # 369 | 370 | # seqlist = samfilter('../Test/Testsampe/outfile.sam', minas=45, maxxs=33) 371 | # 372 | # for i in seqlist: 373 | # 374 | # print(i) 375 | 376 | # tester = bwaversion(bwapath) 377 | # 378 | # print(tester) 379 | # 380 | # res = bwaloci(bwapath, '../Test/Testsampe/DM_404.fa', '../Test/Testsampe/DM_test.faprobes.fa',threadnumber=4) 381 | # 382 | # for i in res: 383 | # print(i) -------------------------------------------------------------------------------- /CMlib/flash_merge.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 659 10 | 236 11 | 12 | 13 | 14 | 15 | 0 16 | 0 17 | 18 | 19 | 20 | Dialog 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 0 30 | 0 31 | 32 | 33 | 34 | 35 | Arial 36 | 15 37 | 75 38 | true 39 | 40 | 41 | 42 | 1. Load left fastq 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 0 51 | 0 52 | 53 | 54 | 55 | 56 | Arial 57 | 15 58 | 75 59 | true 60 | 61 | 62 | 63 | 2. Load right fastq 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 0 72 | 0 73 | 74 | 75 | 76 | 77 | Arial 78 | 15 79 | 75 80 | true 81 | 82 | 83 | 84 | 3. Set Output Directory 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 0 93 | 0 94 | 95 | 96 | 97 | 98 | Arial 99 | 15 100 | 75 101 | true 102 | 103 | 104 | 105 | 4. Set Output Name 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 0 120 | 0 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 0 130 | 0 131 | 132 | 133 | 134 | ... 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 0 147 | 0 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 0 157 | 0 158 | 159 | 160 | 161 | ... 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 0 174 | 0 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 0 184 | 0 185 | 186 | 187 | 188 | ... 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 0 201 | 0 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 0 211 | 0 212 | 213 | 214 | 215 | 216 | Arial 217 | 15 218 | 75 219 | true 220 | 221 | 222 | 223 | 5. CPUs 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 0 232 | 0 233 | 234 | 235 | 236 | 1 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | Arial 249 | 15 250 | 251 | 252 | 253 | background-color: rgb(154, 25, 6); 254 | 255 | 256 | Merge 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 0 265 | 0 266 | 267 | 268 | 269 | 270 | Arial 271 | 24 272 | 75 273 | true 274 | 275 | 276 | 277 | Merge Sequence 278 | 279 | 280 | Qt::AlignCenter 281 | 282 | 283 | 284 | 285 | labelname 286 | label 287 | label_2 288 | left 289 | right 290 | leftbtn 291 | rightbtn 292 | label_3 293 | label_4 294 | output 295 | outputbtn 296 | name 297 | pushButton 298 | left 299 | name 300 | label_5 301 | label_4 302 | 303 | 304 | 305 | 306 | -------------------------------------------------------------------------------- /CMlib/plot_each_bam_filter.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import pysam 3 | from pyfasta import Fasta 4 | import matplotlib 5 | from scipy import stats 6 | import re 7 | import matplotlib.pyplot as plt 8 | import pandas as pd 9 | import numpy as np 10 | from PyQt5 import QtWidgets 11 | 12 | 13 | 14 | def caldel(samfilename, start, end, genename, filter): 15 | """ 16 | 17 | :param samfilename: path and name of each bam file 18 | :param start: setting the start site of deletion calculation 19 | :param end: setting the end site of deletion calculation 20 | :param genename: the name of genome-editing target region 21 | :return: 22 | """ 23 | n = 0 24 | 25 | mutateinfor = dict() 26 | 27 | deletelent = dict() 28 | samfile = pysam.AlignmentFile(samfilename, 'r') 29 | for read in samfile.fetch(genename): 30 | 31 | # print(read.cigartuples, read.cigarstring, read.reference_start, read.cigartuples[0][1], read.cigartuples[0][1]+read.reference_start) 32 | 33 | 34 | nowsite = read.reference_start 35 | # print(read.cigarstring) 36 | for cigarnow in read.cigartuples: 37 | # print(cigarnow) 38 | cigartype = cigarnow[0] 39 | # print(cigartype) 40 | cigarlenght = cigarnow[1] 41 | 42 | cigarend = nowsite + cigarlenght 43 | 44 | if start < nowsite < end: 45 | 46 | if cigartype == 2: 47 | 48 | if cigarlenght < (end - start): 49 | 50 | if cigarlenght in deletelent: 51 | 52 | deletelent[cigarlenght] += 1 53 | else: 54 | deletelent[cigarlenght] = 1 55 | 56 | for i in range(nowsite, cigarend): 57 | 58 | if i in mutateinfor: 59 | 60 | if cigartype in mutateinfor[i]: 61 | 62 | mutateinfor[i][cigartype] += 1 63 | 64 | else: 65 | 66 | mutateinfor[i][cigartype] = 1 67 | 68 | else: 69 | 70 | mutateinfor[i] = dict() 71 | 72 | mutateinfor[i][cigartype] = 1 73 | 74 | nowsite += cigarlenght 75 | 76 | n += 1 77 | 78 | mutateinforpd = pd.DataFrame.from_dict(mutateinfor, orient='index').fillna(value=0) 79 | mutateinforpd['sum'] = mutateinforpd.sum(axis=1) 80 | # print(mutateinforpd[2],mutateinforpd['sum']) 81 | mutateinforpd['delrate'] = mutateinforpd[2]/(mutateinforpd['sum']-filter) * 100 82 | deletelentpd = pd.DataFrame.from_dict(deletelent, orient='index') 83 | 84 | return (mutateinforpd, deletelentpd) 85 | 86 | def barchart_filter(infofile,groupinfo,refname, output, bamdir): 87 | """ 88 | 89 | :param infofile: a description file of details of each sample, example: sample_infor.txt 90 | :param groupinfo: a description file of details of each group, example: group_infor.txt 91 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa 92 | :param output: folder of final result 93 | :param bamdir: folder of temporary files 94 | :return: 95 | """ 96 | datainfo = pd.read_csv(infofile, index_col="Index") 97 | groupinfor = pd.read_csv(groupinfo) 98 | filterfile = os.path.join(output, 'filter_wt_reads_number.txt') 99 | filterinfor= pd.read_table(filterfile) 100 | stranddict = dict() 101 | filter_dict = dict() 102 | for idz in filterinfor.index: 103 | filter_dict[filterinfor.loc[idz]['Sample']] = filterinfor.loc[idz]['filter'] 104 | 105 | for idy in groupinfor.index: 106 | stranddict[groupinfor.loc[idy].rep1] = groupinfor.loc[idy].strand 107 | stranddict[groupinfor.loc[idy].rep2] = groupinfor.loc[idy].strand 108 | stranddict[groupinfor.loc[idy].rep3] = groupinfor.loc[idy].strand 109 | stranddict[groupinfor.loc[idy].control] = groupinfor.loc[idy].strand 110 | fa = Fasta(refname) 111 | for idx in datainfo.index: 112 | note = datainfo.loc[idx].Note 113 | 114 | if note not in stranddict: 115 | error = ' '.join([note, 'is not involved in group table! Please Check!']) 116 | showwarnings("Error", error) 117 | continue 118 | 119 | strand = stranddict[note] 120 | type = '' 121 | if (re.search("gRNA", datainfo.loc[idx].Note)): 122 | if strand == '+': 123 | start = datainfo.loc[idx]['start'] - 10 124 | end = datainfo.loc[idx]['end'] + 10 125 | type = "gf" 126 | else: 127 | start = datainfo.loc[idx]['start'] - 10 128 | end = datainfo.loc[idx]['end'] + 10 129 | type = "gr" 130 | elif (re.search("crRNA", datainfo.loc[idx].Note)): 131 | if strand == '+': 132 | start = datainfo.loc[idx]['start'] - 10 133 | end = datainfo.loc[idx]['end'] + 30 134 | type = "cf" 135 | else: 136 | start = datainfo.loc[idx]['start'] - 30 137 | end = datainfo.loc[idx]['end'] + 10 138 | type = "cr" 139 | # if (re.search("gRNA", datainfo.loc[idx].Note)): 140 | # start = datainfo.loc[idx].start - 10 141 | # end = datainfo.loc[idx].end + 10 142 | # elif (re.search("crRNA", datainfo.loc[idx].Note)): 143 | # start = datainfo.loc[idx].start 144 | # end = datainfo.loc[idx].end + 30 145 | #print(start, end) 146 | bamfile = os.path.join(bamdir, note + '.bam') 147 | pdffile = os.path.join(output, note + '.pdf') 148 | graphcsv= os.path.join(bamdir,note + '.graph.csv') 149 | pamcsv = os.path.join(bamdir, note + '.pam.csv') 150 | 151 | 152 | #print(bamfile) 153 | genename = datainfo.loc[idx].gene_name 154 | seq = fa[genename][start - 1:end].upper() 155 | if strand == '-': 156 | seq=DNA_reverse(DNA_complement(seq)) 157 | seqlist = list() 158 | seqlistPAM = list() 159 | seqlistother = list() 160 | 161 | for nt in seq: 162 | seqlist.append(nt) 163 | 164 | filter_read = filter_dict[datainfo.loc[idx].Note] 165 | (mutateinforpd, deletelentpd) = caldel(samfilename=bamfile, start=start, end=end, genename=genename, filter=filter_read) 166 | 167 | #print(mutateinforpd) 168 | #print(filter_read) 169 | reg = mutateinforpd.loc[start:end] 170 | regPAM = list() 171 | regother = list() 172 | 173 | if type == 'gf': 174 | seqlistPAM = seqlist[-13:-10] 175 | seqlistother = seqlist 176 | seqlistother[-13:-10] = ['', '', ''] 177 | regPAM = reg[-13:-10] 178 | if type == 'gr': 179 | seqlistPAM = seqlist[-13:-10] 180 | seqlistother = seqlist 181 | seqlistother[-13:-10] = ['', '', ''] 182 | regPAM = reg[-13:-10] 183 | # if type == 'gr': 184 | # seqlistPAM = seqlist[10:13] 185 | # seqlistother = seqlist 186 | # seqlistother[10:13] = ['', '', ''] 187 | # regPAM = reg[10:13] 188 | lenth = end - start 189 | if (type == 'cf' or type == 'cr') and lenth == 65: 190 | seqlistPAM = seqlist[10:13] 191 | seqlistother = seqlist 192 | seqlistother[10:13] = ['', '', ''] 193 | regPAM = reg[10:13] 194 | if (type == 'cf' or type == 'cr') and lenth == 66: 195 | seqlistPAM = seqlist[10:14] 196 | seqlistother = seqlist 197 | seqlistother[10:14] = ['', '', '', ''] 198 | regPAM = reg[10:14] 199 | # if type == 'cf': 200 | # seqlistPAM = seqlist[0:4] 201 | # seqlistother = seqlist 202 | # seqlistother[0:4] = ['', '', '', ''] 203 | # regPAM = reg[0:4] 204 | # if type == 'cr': 205 | # seqlistPAM = seqlist[0:4] 206 | # seqlistother = seqlist 207 | # seqlistother[0:4] = ['', '', '', ''] 208 | # regPAM = reg[0:4] 209 | # if type == 'cr': 210 | # seqlistPAM = seqlist[-4:] 211 | # seqlistother = seqlist 212 | # seqlistother[-4:] = ['', '', '', ''] 213 | # regPAM = reg[-4:] 214 | #print(reg) 215 | fig, ax = plt.subplots() 216 | y=reg.delrate 217 | if strand == '-': 218 | y=y[::-1] 219 | ax.bar(reg.index, y, color='blue') 220 | ax.set_title(note) 221 | ax.set_xticks(reg.index, minor=True) 222 | ax.set_xticklabels(seqlistother, color="black", minor=True, fontdict = {'family': 'Arial'}, size = 5) # minor=True表示次坐标轴 223 | ax.set_xticks(regPAM.index) 224 | ax.set_xticklabels(seqlistPAM, color="red", fontdict = {'family': 'Arial'}, size = 5) 225 | # ax.set_xticks(reg.index) 226 | # ax.set_xticklabels(seqlist) 227 | # plt.show() 228 | plt.savefig(pdffile, dpi=300, format="pdf") 229 | plt.close(fig) 230 | print(pdffile, "done!") 231 | 232 | 233 | ####output cvs in tmpfile 234 | ratio_final=list(y) #直接饮用y不会按照方向,必须调整成list才可以 235 | reg['label'] = seqlistother ##合并X横坐标到reg 框架中 236 | reg['ratio'] = ratio_final 237 | reg.to_csv(graphcsv,index=True, index_label="Index") 238 | regPAM['label'] = seqlistPAM 239 | regPAM.to_csv(pamcsv,index=True, index_label="Index") 240 | # print('seqlistother =',seqlistother,sep=' ', file=graphfile) 241 | # print('regPAM =', list(regPAM.index), sep=' ', file=graphfile) 242 | # print('seqlistPAM =', seqlistPAM, sep=' ', file=graphfile) 243 | # print('strand =',strand, sep=' ',file=graphfile) 244 | # graphfile.close() 245 | 246 | 247 | 248 | 249 | def DNA_complement(sequence): 250 | sequence = sequence.upper() 251 | sequence = sequence.replace('A', 't') 252 | sequence = sequence.replace('T', 'a') 253 | sequence = sequence.replace('C', 'g') 254 | sequence = sequence.replace('G', 'c') 255 | return sequence.upper() 256 | 257 | 258 | def DNA_reverse(sequence): 259 | sequence = sequence.upper() 260 | return sequence[::-1] 261 | 262 | 263 | # ############## warning message ######### 264 | def showwarnings(title, message): 265 | wBox = QtWidgets.QMessageBox() 266 | wBox.setIcon(QtWidgets.QMessageBox.Warning) 267 | wBox.setWindowTitle(title) 268 | wBox.setText(message) 269 | wBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 270 | wBox.exec_() 271 | ################################################## -------------------------------------------------------------------------------- /CMlib/plot_pdf_filter.py: -------------------------------------------------------------------------------- 1 | import pysam 2 | from pyfasta import Fasta 3 | import matplotlib 4 | from scipy import stats 5 | from os import path 6 | import matplotlib.pyplot as plt 7 | import pandas as pd 8 | import numpy as np 9 | import re 10 | import os 11 | 12 | 13 | 14 | def caldel(samfilename, start, end, genename, filter): 15 | """ 16 | 17 | :param samfilename: path and name of each bam file 18 | :param start: setting the start site of deletion calculation 19 | :param end: setting the end site of deletion calculation 20 | :param genename: the name of genome-editing target region 21 | :return: 22 | """ 23 | n = 0 24 | 25 | mutateinfor = dict() 26 | 27 | deletelent = dict() 28 | samfile = pysam.AlignmentFile(samfilename, 'r') 29 | for read in samfile.fetch(genename): 30 | 31 | # print(read.cigartuples, read.cigarstring, read.reference_start, read.cigartuples[0][1], read.cigartuples[0][1]+read.reference_start) 32 | 33 | 34 | nowsite = read.reference_start 35 | # print(read.cigarstring) 36 | for cigarnow in read.cigartuples: 37 | # print(cigarnow) 38 | cigartype = cigarnow[0] 39 | # print(cigartype) 40 | cigarlenght = cigarnow[1] 41 | 42 | cigarend = nowsite + cigarlenght 43 | 44 | if start < nowsite < end: 45 | 46 | if cigartype == 2: 47 | 48 | if cigarlenght < (end - start): 49 | 50 | if cigarlenght in deletelent: 51 | 52 | deletelent[cigarlenght] += 1 53 | else: 54 | deletelent[cigarlenght] = 1 55 | 56 | for i in range(nowsite, cigarend): 57 | 58 | if i in mutateinfor: 59 | 60 | if cigartype in mutateinfor[i]: 61 | 62 | mutateinfor[i][cigartype] += 1 63 | 64 | else: 65 | 66 | mutateinfor[i][cigartype] = 1 67 | 68 | else: 69 | 70 | mutateinfor[i] = dict() 71 | 72 | mutateinfor[i][cigartype] = 1 73 | 74 | nowsite += cigarlenght 75 | 76 | n += 1 77 | 78 | mutateinforpd = pd.DataFrame.from_dict(mutateinfor, orient='index').fillna(value=0) 79 | mutateinforpd['sum'] = mutateinforpd.sum(axis=1) 80 | mutateinforpd['delrate'] = mutateinforpd[2]/(mutateinforpd['sum'] - filter) 81 | deletelentpd = pd.DataFrame.from_dict(deletelent, orient='index') 82 | 83 | return (mutateinforpd, deletelentpd) 84 | 85 | def plotpdf_filter(groupinfo, refname, output, bamdir): 86 | 87 | """ 88 | 89 | :param groupinfo: a description file of details of each group, example: group_infor.txt 90 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa 91 | :param output: folder of final result 92 | :param bamdir: folder of temporary files 93 | :return: 94 | """ 95 | 96 | groupinfor = pd.read_csv(groupinfo) 97 | #groupinfor = groupinfor.dropna(axis=0, how='any') 98 | groupinfor = groupinfor.fillna("UNKNOWN") 99 | filterfile = os.path.join(output, 'filter_wt_reads_number.txt') 100 | filterinfor = pd.read_table(filterfile) 101 | filter_dict = dict() 102 | for idz in filterinfor.index: 103 | filter_dict[filterinfor.loc[idz]['Sample']] = filterinfor.loc[idz]['filter'] 104 | fa = Fasta(refname) 105 | 106 | for idx in groupinfor.index: 107 | 108 | repbam1 = os.path.join(bamdir, groupinfor.loc[idx]['rep1'] + '.bam') 109 | repbam2 = os.path.join(bamdir, groupinfor.loc[idx]['rep2'] + '.bam') 110 | ckbam = os.path.join(bamdir, groupinfor.loc[idx]['control'] + '.bam') 111 | 112 | strand = groupinfor.loc[idx]['strand'] 113 | type = '' 114 | if (re.search("gRNA", groupinfor.loc[idx]['group'])): 115 | if strand == '+': 116 | start = groupinfor.loc[idx]['start'] - 10 117 | end = groupinfor.loc[idx]['end'] + 10 118 | type = 'gf' 119 | else: 120 | start = groupinfor.loc[idx]['start'] - 10 121 | end = groupinfor.loc[idx]['end'] + 10 122 | type = 'gr' 123 | 124 | elif (re.search("crRNA", groupinfor.loc[idx]['group'])): 125 | if strand == '+': 126 | start = groupinfor.loc[idx]['start'] - 10 127 | end = groupinfor.loc[idx]['end'] + 30 128 | type = 'cf' 129 | else: 130 | start = groupinfor.loc[idx]['start'] - 30 131 | end = groupinfor.loc[idx]['end'] + 10 132 | type = 'cr' 133 | genename = groupinfor.loc[idx]['gene'] 134 | namenow = groupinfor.loc[idx]['group'] 135 | 136 | 137 | #if (path.exists(repbam1) and path.exists(repbam2)) and path.exists(ckbam): 138 | if (path.exists(repbam1) and path.exists(repbam2)): 139 | 140 | #print(repbam1, repbam2, ckbam, start, end, genename, namenow) 141 | 142 | seq = fa[genename][start - 1:end].upper() 143 | if strand == '-': 144 | seq = DNA_reverse(DNA_complement(seq)) 145 | 146 | seqlist = list() 147 | 148 | for nt in seq: 149 | seqlist.append(nt) 150 | 151 | filter_read1 = filter_dict[groupinfor.loc[idx]['rep1']] 152 | filter_read2 = filter_dict[groupinfor.loc[idx]['rep2']] 153 | 154 | 155 | (mutateinforpd1, deletelentpd1) = caldel(samfilename=repbam1, start=start, end=end, genename=genename, filter=filter_read1) 156 | (mutateinforpd2, deletelentpd2) = caldel(samfilename=repbam2, start=start, end=end, genename=genename, filter=filter_read2) 157 | 158 | rep1 = mutateinforpd1.loc[start:end].delrate 159 | rep2 = mutateinforpd2.loc[start:end].delrate 160 | reg = pd.concat([rep1, rep2], axis=1) 161 | 162 | regmean = reg.mean(axis=1) 163 | stdrr = reg.sem(axis=1) 164 | 165 | 166 | 167 | seqlistPAM = list() 168 | seqlistother = list() 169 | regPAM = list() 170 | if type == 'gf': 171 | seqlistPAM = seqlist[-13:-10] 172 | seqlistother = seqlist 173 | seqlistother[-13:-10] = ['', '', ''] 174 | regPAM = regmean[-13:-10] 175 | if type == 'gr': 176 | seqlistPAM = seqlist[-13:-10] 177 | seqlistother = seqlist 178 | seqlistother[-13:-10] = ['', '', ''] 179 | regPAM = regmean[-13:-10] 180 | # if type == 'gr': 181 | # seqlistPAM = seqlist[10:13] 182 | # seqlistother = seqlist 183 | # seqlistother[10:13] = ['', '', ''] 184 | # regPAM = regmean[10:13] 185 | lenth = end - start 186 | if (type == 'cf' or type == 'cr') and lenth == 65: 187 | seqlistPAM = seqlist[10:13] 188 | seqlistother = seqlist 189 | seqlistother[10:13] = ['', '', ''] 190 | regPAM = reg[10:13] 191 | if (type == 'cf' or type == 'cr') and lenth == 66: 192 | seqlistPAM = seqlist[10:14] 193 | seqlistother = seqlist 194 | seqlistother[10:14] = ['', '', '', ''] 195 | regPAM = reg[10:14] 196 | # if type == 'cf': 197 | # seqlistPAM = seqlist[0:4] 198 | # seqlistother = seqlist 199 | # seqlistother[0:4] = ['', '', '', ''] 200 | # regPAM = regmean[0:4] 201 | # if type == 'cr': 202 | # seqlistPAM = seqlist[0:4] 203 | # seqlistother = seqlist 204 | # seqlistother[0:4] = ['', '', '', ''] 205 | # regPAM = regmean[0:4] 206 | # if type == 'cr': 207 | # seqlistPAM = seqlist[-4:] 208 | # seqlistother = seqlist 209 | # seqlistother[-4:] = ['', '', '', ''] 210 | # regPAM = regmean[-4:] 211 | 212 | 213 | pdfname = os.path.join(output, namenow + '.pdf') 214 | 215 | 216 | fig, (ax0, ax1) = plt.subplots(ncols=2, sharey=True, figsize=(16, 9)) 217 | # ax0.bar(regmean.index, regmean, yerr=stdrr) 218 | y = regmean 219 | if strand == '-': 220 | y=y[::-1] 221 | y_std = stdrr 222 | if strand == '-': 223 | y_std=y_std[::-1] 224 | ax0.bar(regmean.index, y, color='purple') 225 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color 226 | ax0.errorbar(regmean.index, y, yerr=y_std, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None', 227 | ecolor='black') 228 | ax0.set_title(namenow) 229 | 230 | ax0.set_xticks(regmean.index, minor=True) 231 | ax0.set_xticklabels(seqlistother, color="black", minor=True) # minor=True表示次坐标轴 232 | ax0.set_xticks(regPAM.index) 233 | ax0.set_xticklabels(seqlistPAM, color="red") 234 | 235 | # ax0.set_xticks(regmean.index) 236 | # ax0.set_xticklabels(seqlist) 237 | # ax0.tick_params(labelsize=8) 238 | 239 | if path.exists(ckbam): 240 | filter_CK = filter_dict[groupinfor.loc[idx]['control']] 241 | (mutateinforpdCK, deletelentpdCK) = caldel(samfilename=ckbam, start=start, end=end, genename=genename, filter=filter_CK) 242 | ckname = namenow + ' Control' 243 | #pdfname = os.path.join(output, namenow + '.pdf') 244 | regck = mutateinforpdCK.loc[start:end] 245 | 246 | y_ck = regck.delrate 247 | if strand == '-': 248 | y_ck = y_ck[::-1] 249 | else: 250 | ckname = namenow + ' Contron_Unknown' 251 | regck = regmean 252 | y_ck = regmean - regmean 253 | ax1.bar(regck.index, y_ck, color='grey') 254 | ax1.set_title(ckname) 255 | ax1.set_xticks(regck.index,minor=True) 256 | ax1.set_xticklabels(seqlist,minor=True) 257 | ax1.set_xticks(regPAM.index) 258 | ax1.set_xticklabels(seqlistPAM, color="red") 259 | #ax1.tick_params(labelsize=8) 260 | # plt.show() 261 | 262 | plt.savefig(pdfname) 263 | plt.close(fig) 264 | print("group",namenow, "finished!") 265 | 266 | def DNA_complement(sequence): 267 | sequence = sequence.upper() 268 | sequence = sequence.replace('A', 't') 269 | sequence = sequence.replace('T', 'a') 270 | sequence = sequence.replace('C', 'g') 271 | sequence = sequence.replace('G', 'c') 272 | return sequence.upper() 273 | 274 | 275 | def DNA_reverse(sequence): 276 | sequence = sequence.upper() 277 | return sequence[::-1] -------------------------------------------------------------------------------- /CMlib/output_aln_fa_filter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pysam 3 | from pyfasta import Fasta 4 | import matplotlib 5 | from scipy import stats 6 | import matplotlib.pyplot as plt 7 | import pandas as pd 8 | import numpy as np 9 | import re 10 | from glob import glob 11 | from CMlib.showprocess import showbarprocess 12 | from PyQt5 import QtWidgets 13 | 14 | def alnfile_filter(infofile,groupinfo, refname, output, bamdir): 15 | """ 16 | :param infofile: a description file of details of each sample, example: sample_infor.txt 17 | :param groupinfo: a description file of details of each group, example: group_infor.txt 18 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa 19 | :param output: folder of final result 20 | :param bamdir: folder of temporary files 21 | :return: 22 | """ 23 | fa = Fasta(refname) 24 | info = pd.read_csv(infofile, index_col="Index") 25 | groupinfor = pd.read_csv(groupinfo) 26 | stranddict = dict() 27 | # outiofile = os.path.join(output,'filter_wt_reads_number.txt') 28 | # outio = open(outiofile, 'w') 29 | # print("Sample\tfilter", file=outio) 30 | for idy in groupinfor.index: 31 | stranddict[groupinfor.loc[idy].rep1] = groupinfor.loc[idy].strand 32 | stranddict[groupinfor.loc[idy].rep2] = groupinfor.loc[idy].strand 33 | stranddict[groupinfor.loc[idy].rep3] = groupinfor.loc[idy].strand 34 | stranddict[groupinfor.loc[idy].control] = groupinfor.loc[idy].strand 35 | 36 | for idx in info.index: 37 | 38 | note = info.loc[idx].Note 39 | if note not in stranddict: 40 | error = ' '.join([note, 'is not involved in group table! Please Check!']) 41 | showwarnings("Error", error) 42 | continue 43 | 44 | bamname = os.path.join(bamdir, info.loc[idx].Note + '.bam') 45 | outfile_del = os.path.join(output, info.loc[idx].Note + '_del_aln.fa') 46 | outfile_snp = os.path.join(output, info.loc[idx].Note + '_snp_aln.fa') 47 | alnfile_del = os.path.join(output, info.loc[idx].Note + '_del_aln.txt') 48 | alnfile_snp = os.path.join(output, info.loc[idx].Note + '_snp_aln.txt') 49 | print("output", info.loc[idx].Note) 50 | ################ 51 | tmp = "output " + info.loc[idx].Note 52 | showbarprocess(tmp) 53 | ############### 54 | 55 | outfa_del = open(outfile_del, 'w') 56 | outfa_snp = open(outfile_snp, 'w') 57 | outlan_del = open(alnfile_del, 'w') 58 | outlan_snp = open(alnfile_snp, 'w') 59 | 60 | note = info.loc[idx].Note 61 | strand = stranddict[note] 62 | 63 | if (re.search("gRNA", info.loc[idx].Note)): 64 | if strand == '+': 65 | start = info.loc[idx]['start'] - 10 66 | end = info.loc[idx]['end'] + 10 67 | 68 | else: 69 | start = info.loc[idx]['start'] - 10 70 | end = info.loc[idx]['end'] + 10 71 | 72 | elif (re.search("crRNA", info.loc[idx].Note)): 73 | if strand == '+': 74 | start = info.loc[idx]['start'] - 10 75 | end = info.loc[idx]['end'] + 30 76 | 77 | else: 78 | start = info.loc[idx]['start'] - 30 79 | end = info.loc[idx]['end'] + 10 80 | 81 | # if (re.search("gRNA", info.loc[idx].Note)): 82 | # start = info.loc[idx].start - 10 83 | # end = info.loc[idx].end + 10 84 | # elif (re.search("crRNA", info.loc[idx].Note)): 85 | # start = info.loc[idx].start 86 | # end = info.loc[idx].end + 30 87 | #start = info.loc[idx].start - 10 88 | #end = info.loc[idx].end - 10 89 | gene = info.loc[idx].gene_name 90 | samfile = pysam.AlignmentFile(bamname, "rb") 91 | mtreads = set() 92 | totalcov = 0 93 | covage = 0 94 | 95 | replace = set() 96 | replace_left = set() 97 | replace_final = set() 98 | all_tmp = set() 99 | wt_set = set() 100 | replace_side = set() 101 | wt_side_set = set() 102 | wt_final_set = set() 103 | filter_set = set() 104 | 105 | insert = set() 106 | 107 | deletion = set() 108 | 109 | reads = dict() 110 | 111 | seq = fa[gene][start - 1:end].upper() ##reference sequence 112 | seqlist = list() 113 | for nt in seq: 114 | seqlist.append(nt) 115 | 116 | for pileupcolumn in samfile.pileup(gene, max_depth=50000): 117 | 118 | # print (pileupcolumn.pos, pileupcolumn.n) 119 | 120 | 121 | 122 | totalcov += pileupcolumn.n 123 | # print(pileupcolumn.pos, pileupcolumn.n) 124 | 125 | if end > pileupcolumn.pos >= start-1: 126 | 127 | for pileupread in pileupcolumn.pileups: 128 | # print(pileupcolumn.pos, pileupcolumn.n) 129 | 130 | if pileupread.alignment.query_name not in reads: 131 | # print(pileupread.alignment.query_name) 132 | reads[pileupread.alignment.query_name] = '' 133 | 134 | if not pileupread.is_del and not pileupread.is_refskip: 135 | refbase = fa[gene][pileupcolumn.pos].upper() 136 | querybase = pileupread.alignment.query_sequence[pileupread.query_position] 137 | all_tmp.add(pileupread.alignment.query_name) 138 | if querybase != refbase: 139 | replace.add(pileupread.alignment.query_name) 140 | 141 | reads[pileupread.alignment.query_name] += pileupread.alignment.query_sequence[ 142 | pileupread.query_position] 143 | # print(reads[pileupread.alignment.query_name]) 144 | 145 | # print(pileupread.query_position) 146 | # querybase = pileupread.alignment.query_sequence[pileupread.query_position] 147 | 148 | # # refbase = pileupread.alignment.get_reference_sequence()[pileupread.query_position] 149 | # refbase = fa[gene][pileupcolumn.pos].upper() 150 | # if querybase !=refbase : 151 | # # replace += 1 152 | # mtreads.add(pileupread.alignment.query_name) 153 | # replace.add(pileupread.alignment.query_name) 154 | 155 | # if pileupread.indel > 0: 156 | 157 | # # insert += 1 158 | # mtreads.add(pileupread.alignment.query_name) 159 | # insert.add(pileupread.alignment.query_name) 160 | # print() 161 | 162 | if pileupread.indel < 0: 163 | reads[pileupread.alignment.query_name] += '-' * abs(pileupread.indel) 164 | deletion.add(pileupread.alignment.query_name) 165 | # print(reads[pileupread.alignment.query_name]) 166 | # print(reads) 167 | # # deletion += 1 168 | # mtreads.add(pileupread.alignment.query_name) 169 | # deletion.add(pileupread.alignment.query_name) 170 | 171 | wt_set = all_tmp - replace 172 | for pileupcolumn_filter in samfile.pileup(gene, max_depth=50000): ###两边也无突变 173 | 174 | if start > pileupcolumn_filter.pos >= 0 or pileupcolumn_filter.pos > end: 175 | for pileupread_filter in pileupcolumn_filter.pileups: 176 | # for replace_filter in replace_all: 177 | 178 | # if replace_filter in str(pileupread_filter) : 179 | # replace_side.add(pileupread_filter.alignment.query_name) 180 | 181 | if pileupread_filter.alignment.query_name not in replace_left: 182 | 183 | if not pileupread_filter.is_del and not pileupread_filter.is_refskip: 184 | querybase_filter = pileupread_filter.alignment.query_sequence[pileupread_filter.query_position] 185 | 186 | # refbase = pileupread.alignment.get_reference_sequence()[pileupread_filter.query_position] 187 | 188 | refbase_filter = fa[gene][pileupcolumn_filter.pos].upper() 189 | replace_side.add(pileupread_filter.alignment.query_name) # 两边无突变 190 | if querybase_filter != refbase_filter: 191 | # replace += 1 192 | # mtreads.add(pileupread.alignment.query_name) 193 | 194 | # replace.add(pileupread.alignment.query_name) 195 | replace_left.add(pileupread_filter.alignment.query_name) # 两边无突变,有错配 196 | # break 197 | 198 | 199 | wt_side_set = replace_side - replace_left 200 | wt_final_set = wt_side_set & wt_set 201 | filter_set = wt_set - wt_side_set 202 | replace_final = replace - deletion 203 | 204 | 205 | lt = end - start + 1 206 | # print(lt) 207 | typdict = dict() 208 | typdict_snp = dict() 209 | typdict_del = dict() 210 | for i in reads: 211 | if i in filter_set: 212 | continue 213 | if len(reads[i]) == lt: 214 | # print(reads[i]) 215 | if i in replace_final: 216 | if reads[i] in typdict_snp: 217 | typdict_snp[reads[i]] += 1 218 | else: 219 | typdict_snp[reads[i]] = 1 220 | continue 221 | if i in deletion: 222 | if reads[i] in typdict_del: 223 | typdict_del[reads[i]] += 1 224 | else: 225 | typdict_del[reads[i]] = 1 226 | continue 227 | 228 | if reads[i] in typdict: 229 | typdict[reads[i]] += 1 230 | else: 231 | typdict[reads[i]] = 1 232 | for mutype in typdict: 233 | print('>', typdict[mutype], sep='', file=outfa_snp) 234 | print(mutype, file=outfa_snp) 235 | print(typdict[mutype], '\t'.join(mutype), sep='\t', file=outlan_snp) 236 | print('>', typdict[mutype], sep='', file=outfa_del) 237 | print(mutype, file=outfa_del) 238 | print(typdict[mutype], '\t'.join(mutype), sep='\t', file=outlan_del) 239 | for mutype_snp in typdict_snp: 240 | print('>', typdict_snp[mutype_snp], sep='', file=outfa_snp) 241 | print(mutype_snp, file=outfa_snp) 242 | print(typdict_snp[mutype_snp], '\t'.join(mutype_snp), sep='\t', file=outlan_snp) 243 | for mutype_del in typdict_del: 244 | print('>', typdict_del[mutype_del], sep='', file=outfa_del) 245 | print(mutype_del, file=outfa_del) 246 | print(typdict_del[mutype_del], '\t'.join(mutype_del), sep='\t', file=outlan_del) 247 | print("Refseq",'\t'.join(seqlist), sep='\t',file=outlan_snp) 248 | print("Refseq", '\t'.join(seqlist), sep='\t', file=outlan_del) 249 | 250 | 251 | # print(info.loc[idx].Note, end='\t', file=outio) 252 | # print(len(filter_set), end='\n', file=outio) 253 | outfa_snp.close() 254 | outlan_snp.close() 255 | outfa_del.close() 256 | outlan_del.close() 257 | #outio.close() 258 | 259 | # ############## warning message ######### 260 | def showwarnings(title, message): 261 | wBox = QtWidgets.QMessageBox() 262 | wBox.setIcon(QtWidgets.QMessageBox.Warning) 263 | wBox.setWindowTitle(title) 264 | wBox.setText(message) 265 | wBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 266 | wBox.exec_() 267 | ################################################## -------------------------------------------------------------------------------- /CMlib/Barplot_deletion_filter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pysam 3 | from pyfasta import Fasta 4 | import matplotlib 5 | from scipy import stats 6 | import re 7 | import matplotlib.pyplot as plt 8 | import pandas as pd 9 | import numpy as np 10 | from os import path 11 | from matplotlib.ticker import MultipleLocator, FormatStrFormatter 12 | 13 | 14 | def deletion_len(samfilename, genename, output): 15 | outfile_del = os.path.join(output, samfilename + '_del_aln.fa') 16 | mut_file = os.path.join(output,'mut_rate.all.txt') 17 | fw = open(outfile_del,'r') 18 | lenth = dict() 19 | for i in range(1,21): 20 | lenth[i] = 0 21 | lenth['>21'] = 0 22 | for line in fw: 23 | a = line.strip() 24 | if a.startswith('>'): 25 | count_tmp = a.split('>')[1] 26 | else: 27 | delete = re.findall(r"--*",a) 28 | if delete: 29 | for i in delete: 30 | size = len(i) 31 | if size > 20: 32 | lenth['>21'] += int(count_tmp) 33 | else: 34 | lenth[len(i)] +=int(count_tmp) 35 | else: 36 | if re.findall(r"\w-\w|^-\w|\w-$",a): 37 | lenth['1'] += int(count_tmp) 38 | data = pd.read_table(mut_file,index_col=0) 39 | deletelentpd=pd.DataFrame.from_dict(lenth, orient='index') 40 | deletion_sum=deletelentpd.iloc[:, 0].sum() ##统计所有deletion 41 | # deletion_sum = data.loc[samfilename].total_read_count 42 | deletelentpd['ratio'] = deletelentpd[0]/deletion_sum 43 | 44 | deletelentpd_p = pd.DataFrame.from_dict(lenth, orient='index') 45 | deletion_sum_p=deletelentpd.iloc[:, 0].sum() ##统计所有deletion 46 | # deletion_sum_p = data.loc[samfilename].total_read_count ##统计所有deletion 47 | deletelentpd_p['ratio'] = deletelentpd_p[0]/deletion_sum_p*100 48 | return(deletelentpd,deletelentpd_p) 49 | 50 | def deletionbarplot(regmean,stdrr,namenow,deletelentpdall,pdfname): 51 | fig, ax = plt.subplots() 52 | x = np.array(range(1, 22)) 53 | ymajorFormatter = FormatStrFormatter('%1.1f') ## 设置坐标轴格式 54 | ax.yaxis.set_major_formatter(ymajorFormatter) 55 | ax.bar(x, regmean, color='red') 56 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color 57 | ax.errorbar(x, regmean, yerr=stdrr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None',ecolor='black') 58 | ax.set_title(namenow, size=15, fontdict={'family': 'Times New Roman'}) 59 | ax.set_ylabel('Deletion Size (%)', size=15, fontdict={'family': 'Times New Roman'}) 60 | ax.set_xticks(x) 61 | ax.set_xticklabels(deletelentpdall.index, rotation=35, fontdict={'family': 'Arial'}, size=5) 62 | #ax.legend((bar1[0], bar2[0], bar3[0]), ('rep1', 'rep2', 'control')) 63 | plt.savefig(pdfname) 64 | plt.close(fig) 65 | print("group", namenow, "deletion size finished!") 66 | 67 | def barchart_filter(groupinfo, output, bamdir): 68 | 69 | groupinfor = pd.read_csv(groupinfo) 70 | #print(groupinfor) 71 | #groupinfor = groupinfor.dropna(axis=0, how='any',thresh=7) 72 | groupinfor = groupinfor.fillna("UNKNOWN") ##填充表格中NaN处 73 | #print(groupinfor) 74 | 75 | for idx in groupinfor.index: 76 | 77 | repbam1 = os.path.join(bamdir, groupinfor.loc[idx]['rep1'] + '.bam') 78 | repbam2 = os.path.join(bamdir, groupinfor.loc[idx]['rep2'] + '.bam') 79 | repbam3 = os.path.join(bamdir, groupinfor.loc[idx]['rep3'] + '.bam') 80 | 81 | repdel1 = groupinfor.loc[idx]['rep1'] 82 | repdel2 = groupinfor.loc[idx]['rep2'] 83 | repdel3 = groupinfor.loc[idx]['rep3'] 84 | #ckbam = os.path.join(bamdir, groupinfor.loc[idx]['control'] + '.bam') 85 | genename = groupinfor.loc[idx]['gene'] 86 | namenow = groupinfor.loc[idx]['group'] 87 | pdfname = os.path.join(output, namenow + '_deletion_size.pdf') 88 | csvname = os.path.join(output, namenow + '_deletion_size.csv') 89 | 90 | #if (path.exists(repbam1) and path.exists(repbam2)) and path.exists(ckbam): 91 | if (path.exists(repbam1) and path.exists(repbam2) and path.exists(repbam3)): 92 | (deletelentpd1,deletelentpd1_p) = deletion_len(samfilename=repdel1, genename=genename, output=output) 93 | (deletelentpd2,deletelentpd2_p) = deletion_len(samfilename=repdel2, genename=genename, output=output) 94 | (deletelentpd3, deletelentpd3_p) = deletion_len(samfilename=repdel3, genename=genename, output=output) 95 | deletelentpdall = deletelentpd1 96 | #deletelentpdCK = deletion_len(samfilename=ckbam, genename=genename) 97 | #deletelentpd1 = pd.DataFrame.from_dict(pd1, orient='index') 98 | col1 = deletelentpd1_p.iloc[:, 1] ##提取pandas表格的第三列数值 99 | y1 = col1.values 100 | #deletelentpd2 = pd.DataFrame.from_dict(pd2, orient='index') 101 | col2 = deletelentpd2_p.iloc[:, 1] 102 | y2 = col2.values 103 | col3 = deletelentpd3_p.iloc[:, 1] 104 | y3 = col3.values 105 | #deletelentpdCK = pd.DataFrame.from_dict(pdCK, orient='index') 106 | #colCK = deletelentpdCK.iloc[:, 1] 107 | #yCK = colCK.values 108 | reg = pd.concat([col1, col2, col3], axis=1) 109 | 110 | regmean = reg.mean(axis=1) 111 | stdrr = reg.sem(axis=1) 112 | len_raw_data = pd.concat([deletelentpd1, deletelentpd2, deletelentpd3], axis=1) 113 | len_raw_data['ratio_mean'] = regmean 114 | len_raw_data['ratio_stdrr'] = stdrr 115 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep1_count','rep1_Ratio','rep2_count','rep2_Ratio','rep3_count','rep3_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8') 116 | print("Output",csvname, "finished!" ) 117 | 118 | ##plot figures 119 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname) 120 | 121 | elif (path.exists(repbam1) and path.exists(repbam2)): 122 | print("Rep 3 is missing") 123 | (deletelentpd1,deletelentpd1_p) = deletion_len(samfilename=repdel1, genename=genename, output=output) 124 | (deletelentpd2,deletelentpd2_p) = deletion_len(samfilename=repdel2, genename=genename, output=output) 125 | deletelentpdall = deletelentpd1 126 | #deletelentpdCK = deletion_len(samfilename=ckbam, genename=genename) 127 | #deletelentpd1 = pd.DataFrame.from_dict(pd1, orient='index') 128 | col1 = deletelentpd1_p.iloc[:, 1] ##提取pandas表格的第三列数值 129 | y1 = col1.values 130 | #deletelentpd2 = pd.DataFrame.from_dict(pd2, orient='index') 131 | col2 = deletelentpd2_p.iloc[:, 1] 132 | y2 = col2.values 133 | #deletelentpdCK = pd.DataFrame.from_dict(pdCK, orient='index') 134 | #colCK = deletelentpdCK.iloc[:, 1] 135 | #yCK = colCK.values 136 | reg = pd.concat([col1, col2], axis=1) 137 | 138 | regmean = reg.mean(axis=1) 139 | stdrr = reg.sem(axis=1) 140 | len_raw_data = pd.concat([deletelentpd1, deletelentpd2], axis=1) 141 | len_raw_data['ratio_mean'] = regmean 142 | len_raw_data['ratio_stdrr'] = stdrr 143 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep1_count','rep1_Ratio','rep2_count','rep2_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8') 144 | print("Output",csvname, "finished!" ) 145 | 146 | ##plot figures 147 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname) 148 | 149 | elif (path.exists(repbam1) and path.exists(repbam3)): 150 | print("Rep 2 is missing") 151 | (deletelentpd1,deletelentpd1_p) = deletion_len(samfilename=repdel1, genename=genename, output=output) 152 | (deletelentpd3,deletelentpd3_p) = deletion_len(samfilename=repdel3, genename=genename, output=output) 153 | deletelentpdall = deletelentpd1 154 | #deletelentpdCK = deletion_len(samfilename=ckbam, genename=genename) 155 | #deletelentpd1 = pd.DataFrame.from_dict(pd1, orient='index') 156 | col1 = deletelentpd1_p.iloc[:, 1] ##提取pandas表格的第三列数值 157 | y1 = col1.values 158 | #deletelentpd2 = pd.DataFrame.from_dict(pd2, orient='index') 159 | col3 = deletelentpd3_p.iloc[:, 1] 160 | y2 = col3.values 161 | #deletelentpdCK = pd.DataFrame.from_dict(pdCK, orient='index') 162 | #colCK = deletelentpdCK.iloc[:, 1] 163 | #yCK = colCK.values 164 | reg = pd.concat([col1, col3], axis=1) 165 | 166 | regmean = reg.mean(axis=1) 167 | stdrr = reg.sem(axis=1) 168 | len_raw_data = pd.concat([deletelentpd1, deletelentpd3], axis=1) 169 | len_raw_data['ratio_mean'] = regmean 170 | len_raw_data['ratio_stdrr'] = stdrr 171 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep1_count','rep1_Ratio','rep3_count','rep3_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8') 172 | print("Output",csvname, "finished!" ) 173 | 174 | ##plot figures 175 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname) 176 | 177 | elif (path.exists(repbam2) and path.exists(repbam3)): 178 | print("Rep 1 is missing") 179 | (deletelentpd3,deletelentpd3_p) = deletion_len(samfilename=repdel3, genename=genename, output=output) 180 | (deletelentpd2,deletelentpd2_p) = deletion_len(samfilename=repdel2, genename=genename, output=output) 181 | deletelentpdall = deletelentpd3 182 | #deletelentpdCK = deletion_len(samfilename=ckbam, genename=genename) 183 | #deletelentpd1 = pd.DataFrame.from_dict(pd1, orient='index') 184 | col3 = deletelentpd3_p.iloc[:, 1] ##提取pandas表格的第三列数值 185 | y3 = col3.values 186 | #deletelentpd2 = pd.DataFrame.from_dict(pd2, orient='index') 187 | col2 = deletelentpd2_p.iloc[:, 1] 188 | y2 = col2.values 189 | #deletelentpdCK = pd.DataFrame.from_dict(pdCK, orient='index') 190 | #colCK = deletelentpdCK.iloc[:, 1] 191 | #yCK = colCK.values 192 | reg = pd.concat([col3, col2], axis=1) 193 | 194 | regmean = reg.mean(axis=1) 195 | stdrr = reg.sem(axis=1) 196 | len_raw_data = pd.concat([deletelentpd3, deletelentpd2], axis=1) 197 | len_raw_data['ratio_mean'] = regmean 198 | len_raw_data['ratio_stdrr'] = stdrr 199 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep2_count','rep2_Ratio','rep3_count','rep3_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8') 200 | print("Output",csvname, "finished!" ) 201 | 202 | ##plot figures 203 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname) 204 | 205 | elif path.exists(repbam1): 206 | print("Rep2 and Rep3 are missing") 207 | (deletelentpd1,deletelentpd1_p) = deletion_len(samfilename=repdel1, genename=genename, output=output) 208 | deletelentpdall = deletelentpd1 209 | col1 = deletelentpd1_p.iloc[:, 1] ##提取pandas表格的第三列数值 210 | 211 | reg = pd.concat([col1], axis=1) 212 | 213 | regmean = reg.mean(axis=1) 214 | stdrr = reg.sem(axis=1) 215 | len_raw_data = pd.concat([deletelentpd1], axis=1) 216 | len_raw_data['ratio_mean'] = regmean 217 | len_raw_data['ratio_stdrr'] = stdrr 218 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep1_count','rep1_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8') 219 | print("Output",csvname, "finished!" ) 220 | 221 | ##plot figures 222 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname) 223 | 224 | elif path.exists(repbam2): 225 | print("Rep1 and Rep3 are missing") 226 | (deletelentpd2,deletelentpd2_p) = deletion_len(samfilename=repdel2, genename=genename, output=output) 227 | deletelentpdall = deletelentpd2 228 | col2 = deletelentpd2_p.iloc[:, 1] ##提取pandas表格的第三列数值 229 | 230 | reg = pd.concat([col2], axis=1) 231 | 232 | regmean = reg.mean(axis=1) 233 | stdrr = reg.sem(axis=1) 234 | len_raw_data = pd.concat([deletelentpd2], axis=1) 235 | len_raw_data['ratio_mean'] = regmean 236 | len_raw_data['ratio_stdrr'] = stdrr 237 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep2_count','rep2_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8') 238 | print("Output",csvname, "finished!" ) 239 | 240 | ##plot figures 241 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname) 242 | 243 | elif path.exists(repbam3): 244 | print("Rep1 and Rep2 are missing") 245 | (deletelentpd3,deletelentpd3_p) = deletion_len(samfilename=repdel3, genename=genename, output=output) 246 | deletelentpdall = deletelentpd3 247 | col3 = deletelentpd3_p.iloc[:, 1] ##提取pandas表格的第三列数值 248 | reg = pd.concat([col3], axis=1) 249 | 250 | regmean = reg.mean(axis=1) 251 | stdrr = reg.sem(axis=1) 252 | len_raw_data = pd.concat([deletelentpd3], axis=1) 253 | len_raw_data['ratio_mean'] = regmean 254 | len_raw_data['ratio_stdrr'] = stdrr 255 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep3_count','rep3_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8') 256 | print("Output",csvname, "finished!" ) 257 | 258 | ##plot figures 259 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname) 260 | 261 | 262 | -------------------------------------------------------------------------------- /start.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | from PyQt5 import uic,QtWidgets 6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem 7 | from PyQt5.QtCore import Qt 8 | from PyQt5.QtWidgets import QTableWidgetItem, QDialog, QHeaderView, QProgressDialog 9 | import os 10 | 11 | from CMlib.show_sampletable import showtable 12 | from CMlib.show_grouptable import showtable as showgrouptable 13 | from CMlib.show_result import showtable as showresult 14 | from CMlib.show_fasta import showfasta 15 | #from crisprmatch_running import showtable as crisprmatchrun 16 | from crisprmatch_running import mainprogram 17 | 18 | from subprocess import Popen 19 | from subprocess import PIPE 20 | # from CRISPRMatch import main as startrunning 21 | 22 | path = os.getcwd() 23 | qtCreatorFile = os.path.join(path,'CMlib/start.ui') # Aquí va el nombre de tu archivo 24 | 25 | Ui_MainWindow, QtBaseClass = uic.loadUiType(qtCreatorFile) 26 | 27 | 28 | 29 | 30 | class MyApp(QtWidgets.QMainWindow, Ui_MainWindow): 31 | def __init__(self): 32 | QtWidgets.QMainWindow.__init__(self) 33 | Ui_MainWindow.__init__(self) 34 | self.setupUi(self) 35 | self.setWindowTitle('CRISPRMatch Start Page') 36 | 37 | # Aquí van los botones 38 | self.load1btn.clicked.connect(self.getsampleCSV) 39 | self.load2btn.clicked.connect(self.getgenefa) 40 | self.load3btn.clicked.connect(self.getgroupCSV) 41 | self.Show1btn.clicked.connect(self.showsampletable) 42 | self.Show2btn.clicked.connect(self.showgenefa) 43 | self.Show3btn.clicked.connect(self.showgrouptable) 44 | 45 | 46 | self.tbnin.clicked.connect(self.inputdir) 47 | self.tbnout.clicked.connect(self.outputdir) 48 | 49 | self.startButton.clicked.connect(self.startrun) 50 | self.resultButton.clicked.connect(self.showresults) 51 | 52 | 53 | 54 | 55 | self.prosstext = str() ###step information list 56 | self.path1 = "" ###Check loading information 57 | self.path2 = "" 58 | self.path3 = "" 59 | self.path1check = "" 60 | self.path2check = "" 61 | self.path3check = "" 62 | self.outputdirpath = "" 63 | self.inputdirpath = "" 64 | self.resultcheck = "" 65 | self.step = "" 66 | 67 | 68 | 69 | 70 | ##############get sampleCSV and show table######### 71 | def getsampleCSV(self): 72 | filePath1, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path) 73 | if filePath1 != "": 74 | print("Direction", filePath1) # Opcional imprimir la dirección del archivo 75 | self.dfsample = pd.read_csv(str(filePath1)) 76 | tmp = ' '.join(['load sample information table:',filePath1,';\n']) 77 | self.prosstext +=tmp 78 | self.processinfo.setText(self.prosstext) 79 | self.path1=filePath1 80 | 81 | def showsampletable(self): 82 | if self.path1 !="": 83 | self.ui = showtable() ##打开showtable新窗口 84 | result = self.ui.setuptable(self.dfsample) ##传递倒入sample csv 85 | tmp = ' '.join(['check sample table', ';\n']) 86 | self.prosstext += tmp 87 | self.processinfo.setText(self.prosstext) 88 | if result == "yes": 89 | self.ui.show() ##显示窗 90 | self.path1check = self.path1 91 | else: 92 | self.path1check = "" 93 | else: 94 | self.showMessageBox('Warning', 'Please load Sample information Table first') 95 | self.path1="" 96 | 97 | 98 | 99 | 100 | 101 | # ################################################## 102 | 103 | # ##############get genefa and show fasta######### 104 | def getgenefa(self): 105 | filePath2, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path) 106 | if filePath2 != "": 107 | print("Fasta Direction", filePath2) 108 | p = open(filePath2,'r') 109 | self.fafile = p.readlines() 110 | tmp = ' '.join(['load gene file:', filePath2, ';\n']) 111 | self.prosstext += tmp 112 | self.processinfo.setText(self.prosstext) 113 | self.path2 = filePath2 114 | 115 | def showgenefa(self): 116 | if self.path2 != "": 117 | self.ui = showfasta() ##打开showfasta新窗口 118 | result=self.ui.setuptext(self.fafile) ##传递倒入fasta 119 | tmp = ' '.join(['check gene sequence', ';\n']) 120 | self.prosstext += tmp 121 | self.processinfo.setText(self.prosstext) 122 | if result == "yes": 123 | self.ui.show() ##显示窗 124 | self.path2check = self.path2 125 | else: 126 | self.path2check = "" 127 | else: 128 | self.showMessageBox('Warning', 'Please load Gene fasta first') 129 | self.path2="" 130 | # ################################################## 131 | 132 | # ##############get groupCSV and show table######### 133 | def getgroupCSV(self): 134 | filePath3, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path) 135 | if filePath3 != "": 136 | print("Direction", filePath3) # Opcional imprimir la dirección del archivo 137 | self.dfgroup = pd.read_csv(str(filePath3)) 138 | 139 | 140 | stripstr = lambda x: x.strip() if isinstance(x, str) else x ##去除前后空格 141 | self.dfgroup = self.dfgroup.applymap(stripstr) 142 | 143 | 144 | tmp = ' '.join(['load gene file:', filePath3, ';\n']) 145 | self.prosstext += tmp 146 | self.processinfo.setText(self.prosstext) 147 | self.path3 = filePath3 148 | 149 | def showgrouptable(self): 150 | if self.path3 != "": 151 | self.ui = showgrouptable() ##打开showtable新窗口 152 | result = self.ui.setuptable(self.dfgroup) ##传递倒入sample csv 153 | tmp = ' '.join(['check group table', ';\n']) 154 | self.prosstext += tmp 155 | self.processinfo.setText(self.prosstext) 156 | ###判断格式 157 | if result == "yes": 158 | self.ui.show() ##显示窗 159 | self.path3check = self.path3 160 | else: 161 | self.path3check = "" 162 | else: 163 | self.showMessageBox('Warning', 'Please load Group information Table first') 164 | self.path3 = "" 165 | ################################################## 166 | 167 | # ##############set input and output file directory######### 168 | def inputdir(self): 169 | inputdirpath = QtWidgets.QFileDialog.getExistingDirectory(self,'open directory',path) 170 | if inputdirpath !="": 171 | print("Direction", inputdirpath) 172 | self.inputdirpath = inputdirpath 173 | 174 | self.lineEdit_input.setText(inputdirpath) 175 | tmp = ' '.join(['input directory:', inputdirpath, ';\n']) 176 | self.prosstext += tmp 177 | self.processinfo.setText(self.prosstext) 178 | 179 | def outputdir(self): 180 | outputdirpath = QtWidgets.QFileDialog.getExistingDirectory(self, 'open directory', path) 181 | if outputdirpath != "": 182 | print("Direction", outputdirpath) 183 | self.outputdirpath = outputdirpath 184 | 185 | self.lineEdit_output.setText(outputdirpath) 186 | tmp = ' '.join(['output directory:', outputdirpath, ';\n']) 187 | self.prosstext += tmp 188 | self.processinfo.setText(self.prosstext) 189 | ################################################## 190 | 191 | # ############## warning message ######### 192 | def showMessageBox(self, title, message): 193 | msgBox = QtWidgets.QMessageBox() 194 | msgBox.setIcon(QtWidgets.QMessageBox.Warning) 195 | msgBox.setWindowTitle(title) 196 | msgBox.setText(message) 197 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 198 | msgBox.exec_() 199 | ################################################## 200 | 201 | # ############## start running ################### 202 | def startrun(self): 203 | if self.path1 != "" and self.path2 != "" and self.path3 != "": 204 | if self.path1check !="" and self.path2check !="" and self.path3check !="": 205 | sample = self.path1 206 | gene = self.path2 207 | group = self.path3 208 | input = self.inputdirpath 209 | self.output_tmp = self.outputdirpath + '/' + 'tmpfiles' 210 | self.output_result = self.outputdirpath + '/' + 'result' 211 | 212 | #self.ui.startrun(sample, gene, group, input, self.output_tmp, self.output_result) 213 | mainprogram(sample, gene, group, input, self.output_tmp, self.output_result) 214 | 215 | 216 | # x=startrunning(sample,gene,group,input,self.output_tmp,self.output_result) 217 | # tmp = ' '.join([x, ';\n']) 218 | # self.prosstext += tmp 219 | # self.processinfo.setText(self.prosstext) 220 | # self.step="done" 221 | 222 | msgBox = QtWidgets.QMessageBox() 223 | msgBox.setWindowTitle("Information") 224 | msgBox.setIcon(QtWidgets.QMessageBox.Information) 225 | msgBox.setText("Project Done!") 226 | msgBox.setDetailedText("The project has finished, please click ok to show result!") 227 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Open) 228 | 229 | #msgBox.information(self,"Information","Project Done!") 230 | # msgBox.addButton("Show Result",QtWidgets.QMessageBox.ActionRole) 231 | # msgBox.clickedButton() 232 | msgBox.exec_() 233 | self.resultcheck = "done" 234 | self.showresults() 235 | else: 236 | self.showMessageBox('Warning', 'Please click show buttons for information checking') 237 | self.resultcheck = "" 238 | else: 239 | self.showMessageBox('Warning', 'Please load information first') 240 | self.resultcheck = "" 241 | 242 | ################################################## 243 | 244 | # ############## show results ################### 245 | def showresults(self): 246 | if self.path1 != "" and self.resultcheck =="done": 247 | self.ui = showresult() ##打开showtable新窗口 248 | self.ui.setuptable(self.dfsample,self.output_tmp,self.output_result,self.path2,self.dfgroup) ##传递倒入sample csv 249 | tmp = ' '.join(['check sample table', ';\n']) 250 | self.prosstext += tmp 251 | self.processinfo.setText(self.prosstext) 252 | self.ui.show() ##显示窗口 253 | else: 254 | self.showMessageBox('Warning', 'Please load Sample information Table first') 255 | self.path1 = "" 256 | 257 | # crispr = path + '/CRISPRMatch.py' 258 | # cmd = ' '.join(['python',crispr, '-g', gene, '-i', sample, '-gi', group, '-s', output_tmp, '-r', output_result]) 259 | # print(cmd) 260 | # cmd_run = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) 261 | # cmd_run.communicate() 262 | ################################################## 263 | 264 | # ############## show bar ################### 265 | def showbarprocess(self): 266 | if self.step != "done": 267 | num = int(100000) 268 | progress = QProgressDialog(self) 269 | progress.setWindowTitle("请稍等") 270 | progress.setLabelText("正在操作...") 271 | # progress.setCancelButtonText("取消") 272 | progress.setMinimumDuration(5) 273 | progress.setWindowModality(Qt.WindowModal) 274 | progress.setRange(0, num) 275 | for i in range(num): 276 | progress.setValue(i) 277 | if progress.wasCanceled(): 278 | QtWidgets.QMessageBox.warning(self, "提示", "操作失败") 279 | break 280 | else: 281 | progress.setValue(num) 282 | #QtWidgets.QMessageBox.information(self, "提示", "操作成功") 283 | #self.showbarprocess() 284 | else: 285 | pass 286 | 287 | 288 | 289 | # self.boton2.clicked.connect(self.plot) 290 | # self.boton3.clicked.connect(self.showCSV) 291 | # self.boton4.clicked.connect(self.show_table) 292 | # 293 | # self.tableWidget.setAlternatingRowColors(True) # 隔行改变颜色 294 | # rown=self.tableWidget.rowCount() # 返回表格的行数 295 | # coln=self.tableWidget.columnCount() # 返回表格的列数 296 | # print(rown) 297 | # 298 | # #self.tableWidget.setHorizontalHeaderLabels('abcdef') # 设置表格表头数据 299 | # # self.tableWidget.setColumnCount(5) # 设置表格的列数 300 | # # self.tableWidget.setRowCount(3) # 设置表格的行数 301 | # # self.tableWidget.horizontalHeader().setSectionResizeMode(QtWidgets.QHeaderView.ResizeToContents) # 表格设置成大小随内容改变 302 | # # self.tableWidget.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers) 303 | # self.tableWidget.setItem(3, 3, QTableWidgetItem("insert3,3")) # 设置表格内容为字符串"content" 304 | # self.timeEdit = QtWidgets.QTimeEdit() # 创建一个timeEdit 305 | # self.tableWidget.setCellWidget(0, 0, self.timeEdit) # 把timeedit添加进tableWidget内 306 | # self.spinBox = QtWidgets.QSpinBox() 307 | # self.spinBox.setValue(10) 308 | # self.tableWidget.setCellWidget(2, 1, self.spinBox) 309 | # 310 | # ###set tableView 311 | # self.model = QStandardItemModel(4,4) 312 | # self.model.setHorizontalHeaderLabels(['标题1', '标题2', '标题3', '标题4']) 313 | # #下面代码让表格100填满窗口 314 | # self.tableView.horizontalHeader().setStretchLastSection(True) 315 | # self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) 316 | # for row in range(4): 317 | # for column in range(4): 318 | # item = QStandardItem("row %s, column %s" % (row, column)) 319 | # self.model.setItem(row, column, item) 320 | # self.tableView.setModel(self.model) 321 | # self.model.appendRow([ 322 | # QStandardItem("row %s, column %s" % (11, 11)), 323 | # QStandardItem("row %s, column %s" % (11, 11)), 324 | # QStandardItem("row %s, column %s" % (11, 11)), 325 | # QStandardItem("row %s, column %s" % (11, 11)), 326 | # ]) 327 | # # 取当前选中的所有行 328 | # index = self.tableView.currentIndex() 329 | # print(index.row()) 330 | # self.model.removeRow(index.row()) 331 | # ########### 332 | # 333 | # 334 | # 335 | # 336 | # # Aquí van las nuevas funciones 337 | # # Esta función abre el archivo CSV 338 | # def getCSV(self): 339 | # filePath, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', '/Users/qyou/GitLab/pyQttest') 340 | # if filePath != "": 341 | # print("Dirección", filePath) # Opcional imprimir la dirección del archivo 342 | # self.df = pd.read_csv(str(filePath)) 343 | # 344 | # def plot(self): 345 | # x=self.df['col1'] 346 | # y=self.df['col2'] 347 | # plt.plot(x,y) 348 | # plt.show() 349 | # estad_st="Estadisticas de col2: " +str(self.df['col2'].describe()) 350 | # self.resultado.setText(estad_st) 351 | # 352 | # def showCSV(self): 353 | # ###set tableView 354 | # rown=len(self.df.index) 355 | # coln=len(self.df.columns) 356 | # self.model = QStandardItemModel(rown,coln) 357 | # labels = list(self.df.columns.values) 358 | # #labels=['Index','Sample','Vector','Note','gRNA_PAM','start','end','Type','gene_name'] 359 | # self.model.setHorizontalHeaderLabels(labels) 360 | # #下面代码让表格100填满窗口 361 | # # self.tableView_csv.horizontalHeader().setStretchLastSection(True) 362 | # # self.tableView_csv.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) 363 | # 364 | # for row in range(rown): 365 | # #print(self.df.loc[row].Sample) 366 | # for column in range(len(labels)): 367 | # item = QStandardItem(str(self.df.loc[row][labels[column]])) 368 | # self.model.setItem(row, column, item) 369 | # self.tableView_csv.setModel(self.model) 370 | # estad_st=str(self.df) 371 | # self.resultado.setText(estad_st) 372 | # def show_table(self): 373 | # #self.showtableWindow = QtWidgets.QDialog() 374 | # self.ui = showtable() ##打开showtable新窗口 375 | # self.ui.setuptable(self.df) ##传递倒入sample csv 376 | # self.ui.show() ##显示窗口 377 | 378 | 379 | 380 | 381 | 382 | if __name__ == "__main__": 383 | app = QtWidgets.QApplication(sys.argv) 384 | window = MyApp() 385 | window.show() 386 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /crisprmatch_running.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | from CMlib import bwa 5 | import os 6 | import os.path 7 | from CMlib import bwa_run 8 | from CMlib import mut_rate_filter 9 | from CMlib import output_aln_fa_filter 10 | from CMlib import plot_each_bam_filter 11 | from CMlib import Barplot_deletion_filter 12 | from subprocess import Popen 13 | from subprocess import PIPE 14 | import re 15 | 16 | from PyQt5 import uic,QtWidgets 17 | from PyQt5.QtGui import QStandardItemModel, QStandardItem 18 | from PyQt5.QtWidgets import QHeaderView, QPushButton,QProgressDialog 19 | from PyQt5.QtCore import Qt 20 | 21 | 22 | # pathdir = os.getcwd() 23 | # qtCreatorFile = os.path.join(pathdir,'CMlib/processing.ui') 24 | # 25 | # Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile) 26 | 27 | # class showtable(QtWidgets.QDialog, Ui_showtable): 28 | # def __init__(self): 29 | # QtWidgets.QDialog.__init__(self) 30 | # Ui_showtable.__init__(self) 31 | # self.setupUi(self) 32 | # 33 | # self.setWindowTitle('Project Running') 34 | # self.prosstext = str() 35 | # #self.closebtn.clicked.connect(self.sampleEdit) 36 | # print("Start running") 37 | # #self.show() 38 | # 39 | # def getdata(self,content): 40 | # self.prosstext += content 41 | # self.processinfo.setText(self.prosstext) 42 | # 43 | # 44 | # 45 | # def startrun(self,sample,gene,group,inputdir,tmpfile,result): 46 | # mainprogram(sample,gene,group,inputdir,tmpfile,result) 47 | # 48 | # 49 | # if __name__ == "__main__": 50 | # app = QtWidgets.QApplication(sys.argv) 51 | # window = showtable() 52 | # window.show() 53 | # sys.exit(app.exec_()) 54 | 55 | def mainprogram(sample,gene,group,inputdir,tmpfile,result): 56 | """ 57 | :param sample: 58 | :param gene: 59 | :param group: 60 | :param inputdir: 61 | :param tmpfile: 62 | :param result: 63 | :return: 64 | """ 65 | args = check_options(get_options(sample,gene,group,tmpfile,result)) 66 | # ?build bwa index 67 | bwaindexfile = os.path.basename(args.genome) 68 | bwatestindex = os.path.join(args.saved, bwaindexfile+'.sa') 69 | bwaindex = os.path.join(args.saved, bwaindexfile) 70 | bwabuild = True 71 | # if os.path.isfile(bwatestindex): 72 | # 73 | # if not args.docker: 74 | # 75 | # print('find:', bwatestindex) 76 | # 77 | # bwamess = "Found bwa index file " + bwatestindex + ". Do you want rebuild it? Press Y or N to continue:" 78 | # 79 | # print(bwamess) 80 | # 81 | # while True: 82 | # 83 | # char = getch() 84 | # 85 | # if char.lower() in ("y", "n"): 86 | # 87 | # print(char) 88 | # 89 | # if char == 'y': 90 | # 91 | # bwabuild = True 92 | # 93 | # elif char == 'n': 94 | # 95 | # bwabuild = False 96 | # 97 | # break 98 | print("bwabuild:", bwabuild, "threads:", args.threads) 99 | #print("genomesize:", genomesize, "kmer:", kmer, "jfkmerfile:", 100 | # jfkmerfile, "kmerbuild:", kmerbuild, "bwabuild:", bwabuild, "threads:", args.threads) 101 | # ?Build Gene_fasta index 102 | showbarprocess("Start Running...") 103 | if bwabuild: 104 | bwa.bwaindex(args.bwa, args.genome, args.saved) 105 | print("## Step 1:") 106 | print("bwa index build finished ...") 107 | else: 108 | print("Use", bwatestindex) 109 | # self.prosstext += "## Step 1:" 110 | # self.processinfo.setText(self.prosstext) 111 | print("bwa index finshed!!") 112 | # self.prosstext += "bwa index finshed!!" 113 | # self.processinfo.setText(self.prosstext) 114 | # ?run bwa alignment 115 | # self.prosstext += "## Step 2:" 116 | # self.processinfo.setText(self.prosstext) 117 | 118 | showbarprocess("Start mapping...") 119 | print("## Step 2:") 120 | print("loading fastq files...!") 121 | bwa_run.prepare(args.input, args.genome, args.saved, args.bwa, args.samtools, args.picard, inputdir) 122 | print("bwa mem finished!") 123 | 124 | 125 | 126 | # self.prosstext += "bwa mem finished!" 127 | # self.processinfo.setText(self.prosstext) 128 | # end run bwa alignment 129 | # # # ?mutation ration calculation 130 | # # print("## Step 3:") 131 | # # mut_rate.rate_cal(args.input, args.groupinfo, args.genome, args.result, args.saved) 132 | # # print("Mutation calculation finished!") 133 | # # # end mutation 134 | # 135 | # ?mutation ration calculation filter 136 | # self.prosstext += "## Step 3:" 137 | # self.processinfo.setText(self.prosstext) 138 | showbarprocess("Start calculting...") 139 | print("## Step 3 update:") 140 | mut_rate_filter.rate_cal_filter(args.input, args.groupinfo, args.genome, args.result, args.saved) 141 | print("Mutation calculation finished!") 142 | # self.prosstext += "Mutation calculation finished!" 143 | # self.processinfo.setText(self.prosstext) 144 | # # ?mutation result display 145 | # mut_rate_filter.display_filter(args.groupinfo, args.result) 146 | # print("Mutation calculation result have been displayed!") 147 | # # end mutation 148 | # 149 | # # # ?mutation result display 150 | # # mut_rate.display(args.groupinfo, args.result) 151 | # # print("Mutation calculation result have been displayed!") 152 | # # # end display 153 | # 154 | # 155 | # # # ?output aln and fa file 156 | # # print("## Step 4:") 157 | # # output_aln_fa.alnfile(args.input, args.groupinfo, args.genome, args.result, args.saved) 158 | # # print("Alignment files were output!") 159 | # # # end output aln and fa file 160 | # 161 | # ?output aln and fa file 162 | # self.prosstext += "## Step 4:" 163 | # self.processinfo.setText(self.prosstext) 164 | showbarprocess("Start statisitcs...") 165 | print("## Step 4 update:") 166 | output_aln_fa_filter.alnfile_filter(args.input, args.groupinfo, args.genome, args.result, args.saved) 167 | print("Alignment files were output!") 168 | # self.prosstext += "Alignment files were output!" 169 | # self.processinfo.setText(self.prosstext) 170 | # end output aln and fa file 171 | # # ?output aln figure 172 | # print("Starting to plot each alignment...") 173 | # output_aln_fa.alnpdf(args.input, args.result) 174 | # print("Alignment figures were done!") 175 | # # end output aln figure 176 | # 177 | # 178 | # # ?plot each bam 179 | # print("## Step 5:") 180 | # print("Starting to plot each bam...") 181 | # plot_each_bam.barchart(args.input, args.groupinfo,args.genome, args.result, args.saved) 182 | # print("plot each bam finished!") 183 | # # end plot each bam 184 | # ?plot each bam 185 | # self.prosstext += "## Step 5:" 186 | # self.processinfo.setText(self.prosstext) 187 | showbarprocess("Start plotting...") 188 | print("## Step 5 update:") 189 | print("Starting to plot each bam...") 190 | plot_each_bam_filter.barchart_filter(args.input, args.groupinfo,args.genome, args.result, args.saved) 191 | print("plot each bam finished!") 192 | # self.prosstext += "plot each bam finished!" 193 | # self.processinfo.setText(self.prosstext) 194 | # end plot each bam 195 | # ?plot each bam 196 | showbarprocess("Start outputting...") 197 | print("Starting to plot each group deletion size...") 198 | Barplot_deletion_filter.barchart_filter(args.groupinfo, args.result, args.saved) 199 | print("plot each group finished!") 200 | 201 | 202 | # self.prosstext += "plot each group finished!" 203 | # self.processinfo.setText(self.prosstext) 204 | # end plot each bam 205 | # ?plot pdf 206 | # print("## Step 6:") 207 | # print("Starting to plot pdf...") 208 | # plot_pdf.plotpdf(args.groupinfo, args.genome, args.result, args.saved) 209 | # print("plot pdf finished!") 210 | # # end plot pdf 211 | # # ?plot pdf 212 | # print("## Step 6 update:") 213 | # print("Starting to plot pdf...") 214 | # plot_pdf_filter.plotpdf_filter(args.groupinfo, args.genome, args.result, args.saved) 215 | # print("plot pdf finished!") 216 | # # end plot pdf 217 | # 218 | # # ?output aln and fa file 219 | # print("## Step test:") 220 | # output_aln_pdf.alnpdftest(args.input, args.result, args.genome,args.groupinfo) 221 | # print("Alignment files were output!") 222 | # # end output aln and fa file 223 | return "Process Done!" 224 | def check_options(parser): 225 | args = parser.parse_args() 226 | # Start check samtools 227 | if args.samtools: 228 | if not os.path.exists(args.samtools): 229 | print("Can not locate samtools, please input full path of samtools\n") 230 | parser.print_help() 231 | sys.exit(1) 232 | else: 233 | samtoolspath = which('samtools') 234 | if samtoolspath: 235 | samtoolsversion=samtools('samtools') 236 | if samtoolsversion == 'None': 237 | print("Can not locate samtools, please input full path of samtools\n") 238 | parser.print_help() 239 | sys.exit(1) 240 | else: 241 | args.samtools = samtoolspath[0] 242 | else: 243 | print("Can not locate samtools, please input full path of samtools\n") 244 | parser.print_help() 245 | sys.exit(1) 246 | # End check samtools 247 | # Start check picard 248 | if args.picard: 249 | if not os.path.exists(args.picard): 250 | print("Can not locate picard, please input full path of picard\n") 251 | parser.print_help() 252 | sys.exit(1) 253 | else: 254 | picardpath = which('picard') 255 | if picardpath: 256 | picardversion=picard('picard') 257 | if picardversion == 'None': 258 | print("Can not locate picard, please input full path of picard\n") 259 | parser.print_help() 260 | sys.exit(1) 261 | else: 262 | args.picard = picardpath[0] 263 | else: 264 | print("Can not locate picard, please input full path of picard\n") 265 | parser.print_help() 266 | sys.exit(1) 267 | # End check picard 268 | # Start check bwa 269 | if args.bwa: 270 | if not os.path.exists(args.bwa): 271 | print("Can not locate bwa, please input full path of bwa\n") 272 | parser.print_help() 273 | sys.exit(1) 274 | bwaversion = bwa.bwaversion(args.bwa) 275 | if bwaversion == 'None': 276 | print("Can not locate bwa, please input full path of bwa\n") 277 | parser.print_help() 278 | sys.exit(1) 279 | else: 280 | bwapath = which('bwa') 281 | if bwapath: 282 | bwaversion = bwa.bwaversion(bwapath[0]) 283 | if bwaversion == 'None': 284 | print("Can not locate bwa, please input full path of bwa\n") 285 | parser.print_help() 286 | sys.exit(1) 287 | else: 288 | args.bwa = bwapath[0] 289 | else: 290 | print("Can not locate bwa, please input full path of bwa\n") 291 | parser.print_help() 292 | sys.exit(1) 293 | # End check bwa 294 | if not os.path.exists(args.genome): 295 | print("Can not locate genome file, please input genome file.\n") 296 | parser.print_help() 297 | sys.exit(1) 298 | # Start check saved folder 299 | if not os.path.exists(args.saved): 300 | os.mkdir(args.saved) 301 | #End check saved folder 302 | # Start check result folder 303 | if not os.path.exists(args.result): 304 | os.mkdir(args.result) 305 | print("#"*40) 306 | print("bwa version:", args.bwa, bwaversion) 307 | print("samtools version:", args.samtools, samtoolsversion) 308 | print("picard version:", args.picard, picardversion) 309 | #print("jellyfish version:", args.jellyfish, jellyfishversion) 310 | print("genome file:", args.genome) 311 | #print("input file:", args.input) 312 | #print("5\' labeled R primer:", args.primer) 313 | print("tmp output folder:", os.path.realpath(args.saved)) 314 | print("result output folder:", os.path.realpath(args.result)) 315 | print("threads number:", args.threads) 316 | #print("homology:", args.homology) 317 | #print("dtm:", args.dtm) 318 | print("#"*40) 319 | return args 320 | # def check_options(parser): 321 | # 322 | # args = parser.parse_args() 323 | # 324 | # # Start check samtools 325 | # if args.samtools: 326 | # 327 | # if not os.path.exists(args.samtools): 328 | # 329 | # print("Can not locate samtools, please input full path of samtools\n") 330 | # 331 | # parser.print_help() 332 | # 333 | # sys.exit(1) 334 | # 335 | # else: 336 | # 337 | # samtoolspath = which('samtools') 338 | # 339 | # if samtoolspath: 340 | # 341 | # samtoolsversion=samtools('samtools') 342 | # if samtoolsversion == 'None': 343 | # 344 | # print("Can not locate samtools, please input full path of samtools\n") 345 | # 346 | # parser.print_help() 347 | # 348 | # sys.exit(1) 349 | # 350 | # else: 351 | # 352 | # args.samtools = samtoolspath[0] 353 | # 354 | # else: 355 | # 356 | # print("Can not locate samtools, please input full path of samtools\n") 357 | # 358 | # parser.print_help() 359 | # 360 | # sys.exit(1) 361 | # 362 | # # End check samtools 363 | # 364 | # # Start check picard 365 | # if args.picard: 366 | # 367 | # if not os.path.exists(args.picard): 368 | # 369 | # print("Can not locate picard, please input full path of picard\n") 370 | # 371 | # parser.print_help() 372 | # 373 | # sys.exit(1) 374 | # 375 | # else: 376 | # 377 | # picardpath = which('picard') 378 | # 379 | # if picardpath: 380 | # 381 | # picardversion=picard('picard') 382 | # if picardversion == 'None': 383 | # 384 | # print("Can not locate picard, please input full path of picard\n") 385 | # 386 | # parser.print_help() 387 | # 388 | # sys.exit(1) 389 | # 390 | # else: 391 | # 392 | # args.picard = picardpath[0] 393 | # 394 | # else: 395 | # 396 | # print("Can not locate picard, please input full path of picard\n") 397 | # 398 | # parser.print_help() 399 | # 400 | # sys.exit(1) 401 | # 402 | # # End check picard 403 | # 404 | # # Start check bwa 405 | # if args.bwa: 406 | # 407 | # if not os.path.exists(args.bwa): 408 | # 409 | # print("Can not locate bwa, please input full path of bwa\n") 410 | # 411 | # parser.print_help() 412 | # 413 | # sys.exit(1) 414 | # 415 | # bwaversion = bwa.bwaversion(args.bwa) 416 | # 417 | # if bwaversion == 'None': 418 | # 419 | # print("Can not locate bwa, please input full path of bwa\n") 420 | # 421 | # parser.print_help() 422 | # 423 | # sys.exit(1) 424 | # 425 | # else: 426 | # 427 | # bwapath = which('bwa') 428 | # 429 | # if bwapath: 430 | # 431 | # bwaversion = bwa.bwaversion(bwapath[0]) 432 | # 433 | # if bwaversion == 'None': 434 | # 435 | # print("Can not locate bwa, please input full path of bwa\n") 436 | # 437 | # parser.print_help() 438 | # 439 | # sys.exit(1) 440 | # 441 | # else: 442 | # 443 | # args.bwa = bwapath[0] 444 | # 445 | # else: 446 | # 447 | # print("Can not locate bwa, please input full path of bwa\n") 448 | # 449 | # parser.print_help() 450 | # 451 | # sys.exit(1) 452 | # 453 | # # End check bwa 454 | # 455 | # if not os.path.exists(args.genome): 456 | # 457 | # print("Can not locate genome file, please input genome file.\n") 458 | # 459 | # parser.print_help() 460 | # 461 | # sys.exit(1) 462 | # 463 | # # Start check saved folder 464 | # if not os.path.exists(args.saved): 465 | # 466 | # os.mkdir(args.saved) 467 | # 468 | # #End check saved folder 469 | # 470 | # # Start check result folder 471 | # if not os.path.exists(args.result): 472 | # os.mkdir(args.result) 473 | # 474 | # # End check result folder 475 | # 476 | # # # Start check saved folder 477 | # # if os.path.exists(args.saved): 478 | # # 479 | # # if not args.docker: 480 | # # 481 | # # print(args.saved, "exists. Everything in this folder will be remove. Press Y or N to continue: ") 482 | # # 483 | # # while True: 484 | # # 485 | # # char = getch() 486 | # # 487 | # # if char.lower() in ("y", "n"): 488 | # # 489 | # # print(char) 490 | # # 491 | # # if char == 'n': 492 | # # 493 | # # sys.exit(1) 494 | # # 495 | # # break 496 | # # 497 | # # else: 498 | # # 499 | # # os.mkdir(args.saved) 500 | # # End check saved folder 501 | # 502 | # # Print Checked information 503 | # print("#"*40) 504 | # 505 | # print("bwa version:", args.bwa, bwaversion) 506 | # 507 | # print("samtools version:", args.samtools, samtoolsversion) 508 | # 509 | # print("picard version:", args.picard, picardversion) 510 | # 511 | # #print("jellyfish version:", args.jellyfish, jellyfishversion) 512 | # 513 | # print("genome file:", args.genome) 514 | # 515 | # #print("input file:", args.input) 516 | # 517 | # #print("5\' labeled R primer:", args.primer) 518 | # 519 | # print("tmp output folder:", os.path.realpath(args.saved)) 520 | # print("result output folder:", os.path.realpath(args.result)) 521 | # 522 | # print("threads number:", args.threads) 523 | # 524 | # #print("homology:", args.homology) 525 | # 526 | # #print("dtm:", args.dtm) 527 | # 528 | # print("#"*40) 529 | # 530 | # return args 531 | def getch(): 532 | """ 533 | For yes/no choice 534 | """ 535 | import sys, tty, termios 536 | fd = sys.stdin.fileno() 537 | old_settings = termios.tcgetattr(fd) 538 | try: 539 | tty.setraw(sys.stdin.fileno()) 540 | ch = sys.stdin.read(1) 541 | finally: 542 | termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) 543 | return ch 544 | def which(filename): 545 | """docstring for which""" 546 | locations = os.environ.get("PATH").split(os.pathsep) 547 | candidates = [] 548 | for location in locations: 549 | candidate = os.path.join(location, filename) 550 | if os.path.isfile(candidate): 551 | candidates.append(candidate) 552 | return candidates 553 | def samtools(filename): 554 | """ 555 | :param filename: 556 | :return: samtools version 557 | """ 558 | samtoolspath=which(filename) 559 | samtoolscmd = ' '.join([samtoolspath[0], '--version']) 560 | #location= samtoolspath[0] 561 | samtoolsrun = Popen(samtoolscmd, stdout=PIPE, stderr=PIPE, shell=True) 562 | i=samtoolsrun.stdout.readlines()[0] 563 | version = i.decode('utf-8').rstrip('\n') 564 | samtoolsrun.communicate() 565 | return version 566 | def picard(filename): 567 | """ 568 | :param filename: 569 | :return: 570 | """ 571 | picardpath=which(filename) 572 | picardcmd = ' '.join([picardpath[0], 'ViewSam', '-h']) 573 | version = 'None' 574 | picardrun = Popen(picardcmd, stdout=PIPE, stderr=PIPE, shell=True) 575 | #print(picardcmd) 576 | for i in picardrun.stderr.readlines(): 577 | i = i.decode('utf-8').rstrip('\n') 578 | if re.search('Version', i): 579 | (_, version) = i.split(' ') 580 | print(version) 581 | picardrun.communicate() 582 | return version 583 | def get_options(sample,gene,group,tmpfile,result): 584 | parser = argparse.ArgumentParser(description="CRISPRMatch is for location finding", prog='CRISPRMatch') 585 | parser.add_argument('--version', action='version', version='%(prog)s 1.0') 586 | parser.add_argument('-b', '--bwa', dest='bwa', help='bwa path') 587 | parser.add_argument('-sm', '--samtools', dest='samtools', help='samtools path') 588 | parser.add_argument('-pi', '--picard', dest='picard', help='picard path') 589 | parser.add_argument('-g', '--genome', dest='genome', help='fasta format genome file', default=gene) 590 | # parser.add_argument('-g', '--genome', dest='genome', help='fasta format genome file', required=True) 591 | # parser.add_argument('-i', '--input', dest='input', help='sample information input file', required=True) 592 | # parser.add_argument('-gi', '--groupinfo', dest='groupinfo', help='group information input file', required=True) 593 | parser.add_argument('-i', '--input', dest='input', help='sample information input file', default=sample) 594 | parser.add_argument('-gi', '--groupinfo', dest='groupinfo', help='group information input file', default=group) 595 | parser.add_argument('-s', '--save', dest='saved', help='tmp saved folder', default=tmpfile) 596 | parser.add_argument('-r', '--result', dest='result', help='result saved folder', default=result) 597 | # parser.add_argument('-s', '--save', dest='saved', help='tmp saved folder', default='tmpfiles') 598 | # 599 | # parser.add_argument('-r', '--result', dest='result', help='result saved folder', default='result') 600 | parser.add_argument('-t', '--threads', dest='threads', help='threads number or how may cpu you wanna use', 601 | default=1, type=int) 602 | parser.add_argument('--docker', default=False) 603 | # parser.parse_args(['--version']) 604 | # args = parser.parse_args() 605 | return parser 606 | 607 | def showbarprocess(content): 608 | num = int(100000) 609 | progress = QProgressDialog() 610 | progress.setWindowTitle("Processing ...") 611 | progress.setLabelText(content) 612 | progress.setCancelButton(None) ##不显示cancel button 613 | #progress.setCancelButtonText("") 614 | progress.setMinimumDuration(5) 615 | progress.setWindowModality(Qt.WindowModal) 616 | progress.setRange(0, num) 617 | for i in range(num): 618 | progress.setValue(i) 619 | 620 | else: 621 | progress.setValue(num) 622 | 623 | progress.cancel() 624 | 625 | # QtWidgets.QMessageBox.information(self, "提示", "操作成功") 626 | # self.showbarprocess() 627 | 628 | # i __name__ == "__main__": 629 | # 630 | # try: 631 | # 632 | # main() 633 | # 634 | # except KeyboardInterrupt: 635 | # 636 | # sys.stderr.write("User interrupt\n") 637 | # 638 | # sys.exit(0) 639 | -------------------------------------------------------------------------------- /CMlib/mut_rate_filter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pysam 3 | from pyfasta import Fasta 4 | import matplotlib 5 | from scipy import stats 6 | import matplotlib.pyplot as plt 7 | import pandas as pd 8 | import numpy as np 9 | import re 10 | from glob import glob 11 | from matplotlib.ticker import FormatStrFormatter 12 | from CMlib.showprocess import showbarprocess 13 | from PyQt5 import QtWidgets 14 | 15 | def rate_cal_filter(infofile, groupinfo, refname, output, bamdir): 16 | """ 17 | 18 | :param infofile: a description file of details of each sample, example: sample_infor.txt 19 | :param groupinfo: a description file of details of each group, example: group_infor.txt 20 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa 21 | :param output: folder of final result 22 | :param bamdir: folder of temporary files 23 | :return: 24 | """ 25 | 26 | 27 | info=pd.read_csv(infofile,index_col="Index") 28 | groupinfor = pd.read_csv(groupinfo) 29 | #print(groupinfor) 30 | #groupinfor = groupinfor.dropna(axis=0, how='any',thresh=7) ##过滤表哥中没填满的行,thresh=7表示至少7个数不是NA 31 | groupinfor.ix[:,pd.isnull(groupinfor).all()] = "UNKNOWN" 32 | groupinfor=groupinfor.fillna("UNKNOWN") ##填充表格中NaN处 33 | stranddict = dict() 34 | for idy in groupinfor.index: 35 | stranddict[groupinfor.loc[idy].rep1] = groupinfor.loc[idy].strand 36 | stranddict[groupinfor.loc[idy].rep2] = groupinfor.loc[idy].strand 37 | stranddict[groupinfor.loc[idy].rep3] = groupinfor.loc[idy].strand 38 | stranddict[groupinfor.loc[idy].control] = groupinfor.loc[idy].strand 39 | 40 | outputname = os.path.join(output, 'mut_rate.all.txt') 41 | outio = open(outputname, "w") 42 | outiofilter_file = os.path.join(output, 'filter_wt_reads_number.txt') 43 | outiofilter = open(outiofilter_file, 'w') 44 | print("Sample\tfilter", file=outiofilter) 45 | fa = Fasta(refname) 46 | print("start calculation!") 47 | #print("Sample\tmuation\treplace\tinsertion_only\tdeletion_only\tinsert&deletion", file=outio) 48 | print("Sample\tmuation\treplace\tinsertion_only\tdeletion_only\tinsert&deletion\tmuation_count\treplace_count\tinsertion_only_count\tdeletion_only_count\tinsert&deletion_count\ttotal_read_count", file=outio) 49 | 50 | 51 | for idx in info.index: 52 | # print(info.loc[idx].Note, info.loc[idx].gene_name, info.loc[idx].start, info.loc[idx].end) 53 | 54 | note = info.loc[idx].Note 55 | 56 | if note not in stranddict: 57 | error = ' '.join([note, 'is not involved in group table! Please Check!']) 58 | showwarnings("Error", error) 59 | continue 60 | 61 | 62 | bamname = os.path.join(bamdir, info.loc[idx].Note+'.bam') 63 | print("Calculating",bamname) 64 | strand = stranddict[note] 65 | 66 | tmp = ' '.join(['Calculating', info.loc[idx].Note]) 67 | showbarprocess(tmp) 68 | 69 | 70 | if (re.search("gRNA", info.loc[idx].Note)): ##5'端延伸10,3'端延伸10 71 | if strand == '+': 72 | start = info.loc[idx]['start'] - 10 73 | end = info.loc[idx]['end'] + 10 74 | 75 | else: 76 | start = info.loc[idx]['start'] - 10 77 | end = info.loc[idx]['end'] + 10 78 | 79 | elif (re.search("crRNA", info.loc[idx].Note)): ##5'端延伸10,3'端延伸30 80 | if strand == '+': 81 | start = info.loc[idx]['start'] - 10 82 | end = info.loc[idx]['end'] + 30 83 | 84 | else: 85 | start = info.loc[idx]['start'] - 30 86 | end = info.loc[idx]['end'] + 10 87 | 88 | 89 | # if (re.search("gRNA", info.loc[idx].Note)): 90 | # start = info.loc[idx].start - 10 91 | # end = info.loc[idx].end + 10 92 | # # print(info.loc[idx].Note, "orignal-start",info.loc[idx].start, "after:",start, "orignal-end",info.loc[idx].end, "after:",end) 93 | # elif (re.search("crRNA", info.loc[idx].Note)): 94 | # start = info.loc[idx].start 95 | # end = info.loc[idx].end + 30 96 | # print(info.loc[idx].Note, "orignal-start",info.loc[idx].start, "after:",start, "orignal-end",info.loc[idx].end, "after:",end) 97 | # start = info.loc[idx].start 98 | # end = info.loc[idx].end 99 | gene = info.loc[idx].gene_name 100 | samfile = pysam.AlignmentFile(bamname, "rb", check_sq=False) 101 | #print(samfile.count()) 102 | mtreads = set() 103 | totalcov = 0 104 | #covage = samfile.count()/100 105 | covage = set() 106 | 107 | replace = set() 108 | 109 | replace_left = set() 110 | all_tmp = set() 111 | wt_set = set() 112 | replace_side = set() 113 | wt_side_set = set() 114 | wt_final_set = set() 115 | total_read = set() 116 | filter_read = set() 117 | test = set() 118 | 119 | insert = set() 120 | 121 | deletion = set() 122 | 123 | mthreads_all = set() 124 | 125 | insert_deletion = set() 126 | 127 | insert_only = set() 128 | 129 | deletion_only = set() 130 | 131 | for pileupcolumn in samfile.pileup(gene, max_depth=50000): 132 | 133 | # print (pileupcolumn.pos, pileupcolumn.n) pos代表该位点的坐标,n代表它的coverage,pileups代表对应的reads 134 | 135 | totalcov += pileupcolumn.n 136 | 137 | if end >= pileupcolumn.pos >= start: 138 | 139 | 140 | for pileupread in pileupcolumn.pileups: 141 | 142 | covage.add(pileupread.alignment.query_name) 143 | 144 | if not pileupread.is_del and not pileupread.is_refskip: 145 | # print(pileupread.query_position) 146 | querybase = pileupread.alignment.query_sequence[pileupread.query_position] 147 | 148 | # refbase = pileupread.alignment.get_reference_sequence()[pileupread.query_position] 149 | 150 | refbase = fa[gene][pileupcolumn.pos].upper() 151 | all_tmp.add(pileupread.alignment.query_name) 152 | 153 | if querybase != refbase: 154 | # replace += 1 155 | mtreads.add(pileupread.alignment.query_name) 156 | 157 | replace.add(pileupread.alignment.query_name) 158 | 159 | # pileupread.indel: 在当前的pileup位点之后的位置的indel长度。如果下一个位点是insertion,indel>0;如果下一个位点是deletion,indel<0 160 | if pileupread.indel > 0: 161 | # insert += 1 162 | mtreads.add(pileupread.alignment.query_name) 163 | insert.add(pileupread.alignment.query_name) 164 | mthreads_all.add(pileupread.alignment.query_name) 165 | 166 | if pileupread.indel < 0: 167 | # deletion += 1 168 | mtreads.add(pileupread.alignment.query_name) 169 | deletion.add(pileupread.alignment.query_name) 170 | mthreads_all.add(pileupread.alignment.query_name) 171 | 172 | # print(pileupcolumn.pos, pileupcolumn.n, replace, insert, deletion) 173 | wt_set = all_tmp - replace 174 | for pileupcolumn_filter in samfile.pileup(gene, max_depth=50000): 175 | 176 | if start > pileupcolumn_filter.pos >= 0 or pileupcolumn_filter.pos > end: 177 | for pileupread_filter in pileupcolumn_filter.pileups: 178 | # for replace_filter in replace_all: 179 | 180 | # if replace_filter in str(pileupread_filter) : 181 | # replace_side.add(pileupread_filter.alignment.query_name) 182 | test.add(pileupread_filter.alignment.query_name) 183 | if pileupread_filter.alignment.query_name not in replace_left: 184 | 185 | if not pileupread_filter.is_del and not pileupread_filter.is_refskip: 186 | querybase_filter = pileupread_filter.alignment.query_sequence[ 187 | pileupread_filter.query_position] 188 | 189 | # refbase = pileupread.alignment.get_reference_sequence()[pileupread_filter.query_position] 190 | 191 | refbase_filter = fa[gene][pileupcolumn_filter.pos].upper() 192 | replace_side.add(pileupread_filter.alignment.query_name) # 两边无突变 193 | if querybase_filter != refbase_filter: 194 | # replace += 1 195 | # mtreads.add(pileupread.alignment.query_name) 196 | 197 | # replace.add(pileupread.alignment.query_name) 198 | replace_left.add(pileupread_filter.alignment.query_name) # 两边无突变,有错配 199 | # break 200 | 201 | # print(len(test)) 202 | # print(len(replace_side)) 203 | # print(len(replace_left)) 204 | wt_side_set = replace_side - replace_left 205 | wt_final_set = wt_side_set & wt_set 206 | #print(len(wt_set - wt_side_set)) 207 | 208 | insert_deletion = insert & deletion 209 | insert_only = insert - deletion 210 | deletion_only = deletion - insert 211 | #mthreads_all = mtreads - replace 212 | # print(info.loc[idx].Note, end='\t', file=outio) 213 | # print(len(mtreads)/500, end='\t', file=outio) 214 | # print(len(replace)/500, end='\t', file=outio) 215 | # print(len(insert_only)/500, end='\t', file=outio) 216 | # print(len(deletion_only)/500, end='\t', file=outio) 217 | # print(len(insert_deletion)/500, end='\n', file=outio) 218 | 219 | #print(len(covage)) 220 | total_read = mtreads | wt_final_set ##去合集 221 | #total_read = mtreads | wt_set ##去合集 222 | filter_read = covage - total_read 223 | print(info.loc[idx].Note, end='\t', file=outio) 224 | #print(len(mtreads)/len(covage)*100, end='\t', file=outio) 225 | print(len(mthreads_all)/len(total_read) * 100, end='\t', file=outio) 226 | print(len(replace)/len(total_read)*100, end='\t', file=outio) 227 | print(len(insert_only)/len(total_read)*100, end='\t', file=outio) 228 | print(len(deletion_only)/len(total_read)*100, end='\t', file=outio) 229 | print(len(insert_deletion)/len(total_read)*100, end='\t', file=outio) 230 | 231 | print(len(mthreads_all), end='\t', file=outio) 232 | print(len(replace), end='\t', file=outio) 233 | print(len(insert_only), end='\t', file=outio) 234 | print(len(deletion_only), end='\t', file=outio) 235 | print(len(insert_deletion), end='\t', file=outio) 236 | print(len(total_read), end='\n', file=outio) 237 | 238 | print(info.loc[idx].Note, end='\t', file=outiofilter) 239 | print(len(filter_read), end='\n', file=outiofilter) 240 | #print(len(filter_read)) 241 | 242 | # print(info.loc[idx].Note.'\t'.len(mtreads)/500,len(replace)/500,'\t',len(insert_only)/500,'\t',len(deletion_only)/500,'\t',len(insert_deletion)/500, file=outio) 243 | # print(info.loc[idx].Note, len(mtreads)/500, len(replace)/500, len(insert)/500, len(deletion)/500 ) 244 | samfile.close() 245 | outio.close() 246 | 247 | def display_filter(groupinfo, output): 248 | """ 249 | :param groupinfo: a description file of details of each group, example: group_infor.txt 250 | :param output: folder of final result 251 | :return: 252 | """ 253 | 254 | mutfile = os.path.join(output, 'mut_rate.all.txt') 255 | mut_rate = pd.read_table(mutfile, sep='\t') 256 | groupinfor = pd.read_csv(groupinfo) 257 | groupinfor = groupinfor.dropna(axis=0, how='any',thresh=6) ##过滤表哥中没填满的行,thresh=7表示至少7个数不是NA,控制treatment和CK至少有一个 258 | #groupinfor.ix[:,pd.isnull(groupinfor).all()] = "UNKNOWN" 259 | groupinfor=groupinfor.fillna("UNKNOWN") ##填充表格中NaN处 260 | #print(groupinfor) 261 | mut_result = dict() 262 | for idx in mut_rate.index: 263 | mut_result[mut_rate.loc[idx].Sample] = mut_rate.values[idx] ##读入mutation信息 264 | # mut_result['OsPDS-RZ-gRNA1_Rep1'][2] 265 | 266 | ## prepare for display 267 | #replace = list() 268 | #replace_yerr = list() 269 | mutation=list() 270 | mutation_yerr = list() 271 | insertO = list() 272 | insertO_yerr = list() 273 | deletionO = list() 274 | deletionO_yerr = list() 275 | insert_deletion = list() 276 | insert_deletion_yerr = list() 277 | glist = list() 278 | ck_glist = list() 279 | 280 | ck_mutation = list() 281 | ck_insertO = list() 282 | ck_deletionO = list() 283 | ck_insert_deletion = list() 284 | for idy in groupinfor.index: 285 | rep1 = groupinfor.loc[idy].rep1 286 | rep2 = groupinfor.loc[idy].rep2 287 | rep3 = groupinfor.loc[idy].rep3 288 | ck = groupinfor.loc[idy].control 289 | if (mut_result.__contains__(rep1) and mut_result.__contains__(rep2) and mut_result.__contains__(rep3)): 290 | 291 | # replace_mean = np.mean([mut_result[rep1][2], mut_result[rep2][2]]) ##np.mean([1,2,3,4,5]) 292 | # # print(group_mean) 293 | # replace.append(replace_mean) 294 | # replace_std = np.std([mut_result[rep1][2], mut_result[rep2][2]]) ## 标准差 295 | # # print("std", group_var) 296 | # replace_yerr.append(replace_std) 297 | mutation_mean = np.mean([mut_result[rep1][1], mut_result[rep2][1], mut_result[rep3][1]]) ##np.mean([1,2,3,4,5]) 298 | # print(group_mean) 299 | mutation.append(mutation_mean) 300 | mutation_std = np.std([mut_result[rep1][1], mut_result[rep2][1], mut_result[rep3][1]]) ## 标准差 301 | # print("std", group_var) 302 | mutation_yerr.append(mutation_std) 303 | 304 | insertO_mean = np.mean([mut_result[rep1][3], mut_result[rep2][3], mut_result[rep3][3]]) 305 | insertO.append(insertO_mean) 306 | insertO_std = np.std([mut_result[rep1][3], mut_result[rep2][3], mut_result[rep3][3]]) 307 | insertO_yerr.append(insertO_std) 308 | 309 | deletionO_mean = np.mean([mut_result[rep1][4], mut_result[rep2][4], mut_result[rep3][4]]) 310 | deletionO.append(deletionO_mean) 311 | deletionO_std = np.std([mut_result[rep1][4], mut_result[rep2][4], mut_result[rep3][4]]) 312 | deletionO_yerr.append(deletionO_std) 313 | 314 | insert_deletion_mean = np.mean([mut_result[rep1][5], mut_result[rep2][5], mut_result[rep3][5]]) 315 | insert_deletion.append(insert_deletion_mean) 316 | insert_deletion_std = np.std([mut_result[rep1][5], mut_result[rep2][5], mut_result[rep3][5]]) 317 | insert_deletion_yerr.append(insert_deletion_std) 318 | elif mut_result.__contains__(rep1) and mut_result.__contains__(rep2): 319 | print("The group:",groupinfor.loc[idy].group, ": Rep3 is missing.") 320 | mutation.append(np.mean([mut_result[rep1][1],mut_result[rep2][1]])) 321 | mutation_yerr.append(np.std([mut_result[rep1][1],mut_result[rep2][1]])) 322 | insertO.append(np.mean([mut_result[rep1][3],mut_result[rep2][3]])) 323 | insertO_yerr.append(np.std([mut_result[rep1][3],mut_result[rep2][3]])) 324 | deletionO.append(np.mean([mut_result[rep1][4],mut_result[rep2][4]])) 325 | deletionO_yerr.append(np.std([mut_result[rep1][4],mut_result[rep2][4]])) 326 | insert_deletion.append(np.mean([mut_result[rep1][5],mut_result[rep2][5]])) 327 | insert_deletion_yerr.append(np.std([mut_result[rep1][5],mut_result[rep2][5]])) 328 | elif mut_result.__contains__(rep1) and mut_result.__contains__(rep3): 329 | print("The group:",groupinfor.loc[idy].group, ": Rep2 is missing.") 330 | mutation.append(np.mean([mut_result[rep1][1],mut_result[rep3][1]])) 331 | mutation_yerr.append(np.std([mut_result[rep1][1],mut_result[rep3][1]])) 332 | insertO.append(np.mean([mut_result[rep1][3],mut_result[rep3][3]])) 333 | insertO_yerr.append(np.std([mut_result[rep1][3],mut_result[rep3][3]])) 334 | deletionO.append(np.mean([mut_result[rep1][4],mut_result[rep3][4]])) 335 | deletionO_yerr.append(np.std([mut_result[rep1][4],mut_result[rep3][4]])) 336 | insert_deletion.append(np.mean([mut_result[rep1][5],mut_result[rep3][5]])) 337 | insert_deletion_yerr.append(np.std([mut_result[rep1][5],mut_result[rep3][5]])) 338 | elif mut_result.__contains__(rep2) and mut_result.__contains__(rep3): 339 | print("The group:",groupinfor.loc[idy].group, ": Rep1 is missing.") 340 | mutation.append(np.mean([mut_result[rep2][1],mut_result[rep3][1]])) 341 | mutation_yerr.append(np.std([mut_result[rep2][1],mut_result[rep3][1]])) 342 | insertO.append(np.mean([mut_result[rep2][3],mut_result[rep3][3]])) 343 | insertO_yerr.append(np.std([mut_result[rep2][3],mut_result[rep3][3]])) 344 | deletionO.append(np.mean([mut_result[rep2][4],mut_result[rep3][4]])) 345 | deletionO_yerr.append(np.std([mut_result[rep2][4],mut_result[rep3][4]])) 346 | insert_deletion.append(np.mean([mut_result[rep2][5],mut_result[rep3][5]])) 347 | insert_deletion_yerr.append(np.std([mut_result[rep2][5],mut_result[rep3][5]])) 348 | elif mut_result.__contains__(rep1): 349 | print("The group:",groupinfor.loc[idy].group, ": Rep2 and Rep3 are missing.") 350 | mutation.append(mut_result[rep1][1]) 351 | mutation_yerr.append(0) 352 | insertO.append(mut_result[rep1][3]) 353 | insertO_yerr.append(0) 354 | deletionO.append(mut_result[rep1][4]) 355 | deletionO_yerr.append(0) 356 | insert_deletion.append(mut_result[rep1][5]) 357 | insert_deletion_yerr.append(0) 358 | elif mut_result.__contains__(rep2): 359 | print("The group:",groupinfor.loc[idy].group, ": Rep1 and Rep3 are missing.") 360 | mutation.append(mut_result[rep2][1]) 361 | mutation_yerr.append(0) 362 | insertO.append(mut_result[rep2][3]) 363 | insertO_yerr.append(0) 364 | deletionO.append(mut_result[rep2][4]) 365 | deletionO_yerr.append(0) 366 | insert_deletion.append(mut_result[rep2][5]) 367 | insert_deletion_yerr.append(0) 368 | elif mut_result.__contains__(rep3): 369 | print("The group:",groupinfor.loc[idy].group, ": Rep1 and Rep2 are missing.") 370 | mutation.append(mut_result[rep3][1]) 371 | mutation_yerr.append(0) 372 | insertO.append(mut_result[rep3][3]) 373 | insertO_yerr.append(0) 374 | deletionO.append(mut_result[rep3][4]) 375 | deletionO_yerr.append(0) 376 | insert_deletion.append(mut_result[rep3][5]) 377 | insert_deletion_yerr.append(0) 378 | else: 379 | print("All repetitions in group:", groupinfor.loc[idy].group, " is missing.") 380 | mutation.append(0) 381 | mutation_yerr.append(0) 382 | insertO.append(0) 383 | insertO_yerr.append(0) 384 | deletionO.append(0) 385 | deletionO_yerr.append(0) 386 | insert_deletion.append(0) 387 | insert_deletion_yerr.append(0) 388 | 389 | if ck=='UNKNOWN': 390 | print("The group:",groupinfor.loc[idy].group, ": CK is missing.") 391 | ck_mutation.append(0) 392 | ck_insertO.append(0) 393 | ck_deletionO.append(0) 394 | ck_insert_deletion.append(0) 395 | else: 396 | ck_mutation.append(mut_result[ck][1]) 397 | ck_insertO.append(mut_result[ck][3]) 398 | ck_deletionO.append(mut_result[ck][4]) 399 | ck_insert_deletion.append(mut_result[ck][5]) 400 | 401 | 402 | glist.append(groupinfor.loc[idy].group) 403 | ck_glist.append(groupinfor.loc[idy].control) 404 | ## prepare for display 405 | 406 | ## print out pdf 407 | mutfile = os.path.join(output, 'mut_result.pdf') 408 | fig, (ax0, ax1) = plt.subplots(ncols=2, sharey=True) 409 | fig.set_size_inches(20, 9) 410 | width = 0.15 411 | ymajorFormatter = FormatStrFormatter('%1.1f') ## 设置坐标轴格式 412 | ax0.yaxis.set_major_formatter(ymajorFormatter) 413 | bar1 = ax0.bar(groupinfor.index, mutation, width, color="#CC79A7") 414 | #bar1 = ax0.bar(groupinfor.index, replace, width, color='pink', yerr=replace_yerr, elinewidth=0.1, capsize=1.5) 415 | ax0.errorbar(groupinfor.index, mutation, yerr=mutation_yerr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None', ecolor='black') 416 | bar2 = ax0.bar(groupinfor.index + width, deletionO, width, color="#D55E00") 417 | #bar2 = ax0.bar(groupinfor.index + width, insertO, width, color='green', yerr=insertO_yerr, linewidth=0.5,capsize=1.5) 418 | ax0.errorbar(groupinfor.index+ width, deletionO, yerr=deletionO_yerr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None', ecolor='black') 419 | #bar3 = ax0.bar(groupinfor.index + width * 2, deletionO, width, color='blue', yerr=deletionO_yerr, linewidth=0.5,capsize=1.5) 420 | bar3 = ax0.bar(groupinfor.index + width * 2, insertO, width, color="#0072B2") 421 | ax0.errorbar(groupinfor.index + width * 2, insertO, yerr=insertO_yerr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5,ls='None', ecolor='black') 422 | bar4 = ax0.bar(groupinfor.index + width * 3, insert_deletion, width, color="#009E73") 423 | #bar4 = ax0.bar(groupinfor.index + width * 3, insert_deletion, width, color='orange', yerr=insert_deletion_yerr,linewidth=0.5, capsize=1.5) 424 | ax0.errorbar(groupinfor.index + width * 3, insert_deletion, yerr=insert_deletion_yerr, fmt='', elinewidth=0.5, capsize=2,capthick=0.5,ls='None', ecolor='black') 425 | 426 | # ax.bar(reg.index, reg.delrate, color='blue') 427 | ax0.set_title('Treatment', size=15,fontdict = {'family': 'Times New Roman'}) 428 | ax0.set_ylabel('All Mutation (%)', size=15,fontdict = {'family': 'Times New Roman'}) 429 | ax0.set_xticks(groupinfor.index + 1.5 * width) 430 | #ax0.set_xticklabels(glist, rotation=35, size=6) 431 | ax0.set_xticklabels(glist, rotation=35, fontdict = {'family': 'Arial'}, size = 5) 432 | ax0.legend((bar1[0], bar2[0], bar3[0], bar4[0]), ('mutation_all', 'deletion_only', 'insert_only','insert&&deletion')) 433 | 434 | bar5 = ax1.bar(groupinfor.index, ck_mutation, width, color="#CC79A7") 435 | bar6 = ax1.bar(groupinfor.index + width, ck_deletionO, width, color="#D55E00") 436 | bar7 = ax1.bar(groupinfor.index + width * 2, ck_insertO, width, color="#0072B2") 437 | bar8 = ax1.bar(groupinfor.index + width * 3, ck_insert_deletion, width, color="#009E73") 438 | # ax.bar(reg.index, reg.delrate, color='blue') 439 | ax1.set_title('Control', size=15,fontdict = {'family': 'Times New Roman'}) 440 | ax1.set_ylabel('All Mutation (%)', size=15,fontdict = {'family': 'Times New Roman'}) 441 | ax1.set_xticks(groupinfor.index + 1.5 * width) 442 | #ax1.set_xticklabels(ck_glist, rotation=35, size=6) 443 | ax1.set_xticklabels(ck_glist, rotation=35, fontdict = {'family': 'Arial'}, size = 5) 444 | ax1.legend((bar5[0], bar6[0], bar7[0], bar8[0]), ('mutation_all', 'deletion_only', 'insert_only', 'insert&&deletion')) 445 | # plt.show() 446 | plt.savefig(mutfile, dpi=300, format="pdf") 447 | plt.close(fig) 448 | ## print out pdf 449 | 450 | # ############## warning message ######### 451 | def showwarnings(title, message): 452 | wBox = QtWidgets.QMessageBox() 453 | wBox.setIcon(QtWidgets.QMessageBox.Warning) 454 | wBox.setWindowTitle(title) 455 | wBox.setText(message) 456 | wBox.setStandardButtons(QtWidgets.QMessageBox.Ok) 457 | wBox.exec_() 458 | ################################################## --------------------------------------------------------------------------------