├── sample_test
├── group_info.csv
├── split_sample
│ └── split_barcode.csv
├── sample_infor.csv
└── Samples_gene.fa
├── CMlib
├── subprocesspath.py
├── showprocess.py
├── show_fasta.ui
├── show_sampletable.ui
├── processing.ui
├── show_result.ui
├── split_fastq.py
├── bwa_run.py
├── show_sampletable.py
├── show_grouptable.py
├── show_fasta.py
├── show_barcodestable.py
├── output_aln_pdf.py
├── change_color.py
├── plotfigures.py
├── start.bak.ui
├── split_lanes.ui
├── start.ui
├── bwa.py
├── flash_merge.ui
├── plot_each_bam_filter.py
├── plot_pdf_filter.py
├── output_aln_fa_filter.py
├── Barplot_deletion_filter.py
└── mut_rate_filter.py
├── readme.md
├── merge.py
├── split.py
├── start.py
└── crisprmatch_running.py
/sample_test/group_info.csv:
--------------------------------------------------------------------------------
1 | group,rep1,rep2,rep3,control,gene,strand,start,end
2 | AsCpf1-OsPDS-crRNA01,AsCpf1-OsPDS-TTTG-crRNA01_rep1,AsCpf1-OsPDS-TTTG-crRNA01_rep2,AsCpf1-OsPDS-TTTG-crRNA01_rep3,AsCpf1-OsPDS-TTTG-crRNA01_ck,OsPDS,+,136,162
--------------------------------------------------------------------------------
/CMlib/subprocesspath.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | def subprocesspath(path):
5 | """
6 |
7 | :param path: path
8 | :return: path, for subprocess, avoid white space error
9 | """
10 |
11 | rpath = '\''+ os.path.abspath(path)+'\''
12 |
13 | return rpath
14 |
15 | def testfun():
16 | pass
--------------------------------------------------------------------------------
/sample_test/split_sample/split_barcode.csv:
--------------------------------------------------------------------------------
1 | Index,Sample,Barcode_L,Barcode_R
1,TX180701,GATCAG,GTCCGC
2,TX180702,TAGCTT,GTCCGC
3,TX180703,TAATCG,ACAGTG
4,TX180704,TACAGC,GCCAAT
5,TX180705,GGCTAC,GTCCGC
6,TX180706,CTTGTA,GTCCGC
7,TX180707,GATCAG,GTGAAA
8,TX180708,TAGCTT,GTGAAA
9,TX180709,TATAAT,CAGATC
10,TX180710,TCATTC,ACTTGA
11,TX180711,GGCTAC,GTGAAA
12,TX180712,CTTGTA,GTGAAA
--------------------------------------------------------------------------------
/sample_test/sample_infor.csv:
--------------------------------------------------------------------------------
1 | Index,Sample,Vector,Note,gRNA_PAM,start,end,Type,gene_name
2 | 1,TX.1,-,AsCpf1-OsPDS-TTTG-crRNA01_ck,tttgGAGTGAAATCTCTTGTCTTAAGG,136,162,CK,OsPDS
3 | 2,TX.2,pYPQ203-AsCpf1-OsPDS-crRNA01,AsCpf1-OsPDS-TTTG-crRNA01_rep1,tttgGAGTGAAATCTCTTGTCTTAAGG,136,162,Rep1,OsPDS
4 | 3,TX.3,pYPQ203-AsCpf1-OsPDS-crRNA01,AsCpf1-OsPDS-TTTG-crRNA01_rep3,tttgGAGTGAAATCTCTTGTCTTAAGG,136,162,Rep3,OsPDS
5 | 4,TX.4,pYPQ203-AsCpf1-OsPDS-crRNA01,AsCpf1-OsPDS-TTTG-crRNA01_rep2,tttgGAGTGAAATCTCTTGTCTTAAGG,136,162,Rep2,OsPDS
--------------------------------------------------------------------------------
/CMlib/showprocess.py:
--------------------------------------------------------------------------------
1 | from PyQt5.QtWidgets import QHeaderView, QPushButton,QProgressDialog
2 | from PyQt5.QtCore import Qt
3 |
4 | def showbarprocess(content):
5 | num = int(100000)
6 | progress = QProgressDialog()
7 | progress.setWindowTitle("Please waiting")
8 | progress.setLabelText(content)
9 | progress.setCancelButton(None) ##不显示cancel button
10 | #progress.setCancelButtonText("")
11 | progress.setMinimumDuration(5)
12 | progress.setWindowModality(Qt.WindowModal)
13 | progress.setRange(0, num)
14 | for i in range(num):
15 | progress.setValue(i)
16 |
17 | else:
18 | progress.setValue(num)
--------------------------------------------------------------------------------
/CMlib/show_fasta.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 867
10 | 353
11 |
12 |
13 |
14 | Dialog
15 |
16 |
17 | -
18 |
19 |
20 | -
21 |
22 |
23 | Close
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/CMlib/show_sampletable.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 866
10 | 355
11 |
12 |
13 |
14 | Dialog
15 |
16 |
17 | -
18 |
19 |
20 | -
21 |
22 |
23 | Close
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/CMlib/processing.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 400
10 | 300
11 |
12 |
13 |
14 | Dialog
15 |
16 |
17 |
18 |
19 | 50
20 | 60
21 | 301
22 | 23
23 |
24 |
25 |
26 | 24
27 |
28 |
29 |
30 |
31 |
32 | 50
33 | 30
34 | 101
35 | 16
36 |
37 |
38 |
39 | Processing
40 |
41 |
42 |
43 |
44 |
45 | 50
46 | 90
47 | 101
48 | 16
49 |
50 |
51 |
52 | Details
53 |
54 |
55 |
56 |
57 |
58 | 50
59 | 120
60 | 311
61 | 151
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/CMlib/show_result.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 866
10 | 580
11 |
12 |
13 |
14 | Dialog
15 |
16 |
17 | -
18 |
19 |
20 |
21 | Arial
22 | 15
23 | 75
24 | true
25 |
26 |
27 |
28 | 1. Single sample result
29 |
30 |
31 |
32 | -
33 |
34 |
35 | -
36 |
37 |
38 |
39 | Arial
40 | 15
41 | 75
42 | true
43 |
44 |
45 |
46 | 2. Groups result
47 |
48 |
49 |
50 | -
51 |
52 |
53 | -
54 |
55 |
56 | Close
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/CMlib/split_fastq.py:
--------------------------------------------------------------------------------
1 | import os
2 | from PyQt5.QtWidgets import QHeaderView, QPushButton,QProgressDialog,QProgressBar,QDialog
3 | from PyQt5.QtCore import Qt,QBasicTimer
4 |
5 | def reverse_complement(dna):
6 | complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
7 | return ''.join([complement[base] for base in dna[::-1]])
8 |
9 |
10 |
11 | def split_fastq(indexnow,df,fastq,output):
12 | '''
13 |
14 | :param df: barcode csv
15 | :param fastq: split fastqfile
16 | :param output: output directory
17 | :return:
18 | '''
19 | all_fastq = set()
20 | lev_fastq = set()
21 | barcodes = dict()
22 | barcodes_reverse = dict()
23 |
24 |
25 | sample = df.loc[indexnow]['Sample']
26 | left_code = df.loc[indexnow]['Barcode_L'].upper()
27 | right_code_raw = df.loc[indexnow]['Barcode_R'].upper()
28 | right_code = reverse_complement(right_code_raw)
29 | left_length = len(left_code)
30 | right_length = len(right_code)
31 | left_code_reverse = reverse_complement(left_code)
32 | right_code_reverse = reverse_complement(left_code)
33 | dirname = os.path.join(output,sample)
34 | mkdircmd = ' '.join(['mkdir',dirname])
35 | print(mkdircmd)
36 | os.system(mkdircmd)
37 | fileq = open(os.path.join(output,sample,sample + '.extendedFrags.fastq'),'w')
38 |
39 | flag = 0
40 | with open(fastq) as seqfile:
41 | for i in seqfile:
42 | inf = i.rstrip()
43 | flag += 1
44 | if flag == 1:
45 | name = inf
46 | if flag == 2:
47 | seq = inf
48 | if flag == 4:
49 | quality = inf
50 | value = name + '\n' + seq + '\n' + '+' + '\n' + quality
51 | flag = 0
52 | if seq[:left_length] == left_code and seq[-right_length:] == right_code:
53 | seq1 = seq[left_length:-right_length]
54 | quality1 = quality[left_length:-right_length]
55 | print(name,seq1,'+',quality1,sep='\n',file=fileq)
56 | if seq[:right_length] == right_code_raw and seq[-left_length:] == left_code_reverse:
57 | seq1 = seq[right_length:-left_length]
58 | quality1 = quality[right_length:-left_length]
59 | print(name,seq1,'+',quality1,sep='\n',file=fileq)
60 | fileq.close()
61 |
--------------------------------------------------------------------------------
/CMlib/bwa_run.py:
--------------------------------------------------------------------------------
1 | import os
2 | from glob import glob
3 | import pandas as pd
4 | from pyfasta import Fasta
5 | from subprocess import Popen
6 | from subprocess import PIPE
7 |
8 | def prepare(infofile, refname, output, bwabin, samtoolsbin, picardbin,inputdir):
9 | """
10 |
11 | :param infofile: a description file of details of each sample, example: sample_infor.txt
12 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa
13 | :param output: folder of temporary files
14 | :param bwabin: bwa bin path
15 | :param samtoolsbin: samtools bin bath
16 | :param picardbin: picard bin path
17 | :return:
18 | """
19 | datainfo=pd.read_csv(infofile,index_col="Index")
20 | outputname = os.path.join(output, 'bwa_run.sh')
21 | documentdir = os.path.abspath(inputdir)
22 | #documentdir = os.path.dirname(os.path.abspath(infofile))
23 | genomeindex = os.path.join(output, os.path.basename(refname))
24 |
25 | outio = open(outputname,"w")
26 | for idx in datainfo.index:
27 | fqname = documentdir+'/'+datainfo.ix[idx]['Sample']+'/'+datainfo.ix[idx]['Sample']+'.extendedFrags.fastq'
28 | bamfile = output + '/' + datainfo.ix[idx]['Note'] + '.bam'
29 | bwamemcmd = ' '.join([bwabin, 'mem', genomeindex,fqname, '|', samtoolsbin, 'view','-bS','-', '|', samtoolsbin, 'sort', '-','-o', bamfile])
30 | samtoolscmd = ' '.join([samtoolsbin, 'index',bamfile])
31 | print(bwamemcmd)
32 | print(samtoolscmd)
33 | bwarun = Popen(bwamemcmd, stdout=PIPE, stderr=PIPE, shell=True)
34 | bwarun.communicate()
35 | samtoolsrun = Popen(samtoolscmd, stdout=PIPE, stderr=PIPE, shell=True)
36 | samtoolsrun.communicate()
37 |
38 | # print(bwabin,' mem ', os.path.basename(refname), ' ', fqname, ' | ',picardbin,' SortSam I=/dev/stdin O=', bamfile,
39 | # ' SO=coordinate', sep='', file=outio)
40 | # print(samtoolsbin,' index ',bamfile, file=outio)
41 | #print('bwa mem ', os.path.basename(refname), ' ', fqname, ' | picard SortSam I=/dev/stdin O=', bamfile, ' SO=coordinate', sep='',
42 | # file=outio)
43 | #print('samtools index ', bamfile, file=outio)
44 | # outio.close()
45 | print("bwa command load!")
46 |
47 | # ###run bwa mem
48 | # bwacmd="bash bwa_run.sh"
49 | # print(bwacmd)
50 | # runbwaalign = Popen(bwacmd, shell=True, cwd=output)
51 | # runbwaalign.communicate()
52 |
53 | print("bwa mem finished")
54 |
55 | return True
--------------------------------------------------------------------------------
/CMlib/show_sampletable.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 |
5 | from PyQt5 import uic,QtWidgets
6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem
7 | from PyQt5.QtWidgets import QHeaderView
8 | import os
9 |
10 | path = os.getcwd()
11 | qtCreatorFile = os.path.join(path,'CMlib/show_sampletable.ui')
12 |
13 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile)
14 |
15 | class showtable(QtWidgets.QDialog, Ui_showtable):
16 | def __init__(self):
17 | QtWidgets.QDialog.__init__(self)
18 | Ui_showtable.__init__(self)
19 | self.setupUi(self)
20 |
21 | self.setWindowTitle('Sample Information Table')
22 | self.checkbtn.clicked.connect(self.sampleEdit)
23 | print("open sample infor table")
24 |
25 | def setuptable(self,pd):
26 |
27 | self.df=pd
28 | rown = len(self.df.index)
29 | coln = len(self.df.columns)
30 | self.model = QStandardItemModel(rown, 9)
31 | # labels = list(self.df.columns.values)
32 | # rown=len(self.df.index)
33 | # self.model = QStandardItemModel(rown,9)
34 | labels=['Index','Sample','Vector','Note','gRNA_PAM','start','end','Type','gene_name']
35 | ###判断格式
36 | if list(self.df.columns.values) != labels:
37 | self.showMessageBox("warning", "wrong table!")
38 | return "wrong"
39 | else:
40 | self.model.setHorizontalHeaderLabels(labels)
41 | # self.tableView.resize(500,300)
42 | #下面代码让表格100填满窗口
43 | self.tableView.horizontalHeader().setStretchLastSection(True)
44 | self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
45 |
46 | for row in range(rown):
47 | #print(self.df.loc[row].Sample)
48 | for column in range(9):
49 | item = QStandardItem(str(self.df.loc[row][labels[column]]))
50 | self.model.setItem(row, column, item)
51 |
52 | self.tableView.setModel(self.model)
53 |
54 | return "yes"
55 |
56 | def sampleEdit(self):
57 | self.close() ## 关闭窗口
58 |
59 | # ############## warning message #########
60 | def showMessageBox(self, title, message):
61 | msgBox = QtWidgets.QMessageBox()
62 | msgBox.setIcon(QtWidgets.QMessageBox.Warning)
63 | msgBox.setWindowTitle(title)
64 | msgBox.setText(message)
65 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
66 | msgBox.exec_()
67 | ##################################################
68 |
69 |
70 |
71 |
72 | if __name__ == "__main__":
73 | app = QtWidgets.QApplication(sys.argv)
74 | window = showtable()
75 | window.show()
76 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/CMlib/show_grouptable.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 |
5 | from PyQt5 import uic,QtWidgets
6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem
7 | from PyQt5.QtWidgets import QHeaderView
8 | import os
9 |
10 | path = os.getcwd()
11 | qtCreatorFile = os.path.join(path,'CMlib/show_sampletable.ui')
12 |
13 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile)
14 |
15 | class showtable(QtWidgets.QDialog, Ui_showtable):
16 | def __init__(self):
17 | QtWidgets.QDialog.__init__(self)
18 | Ui_showtable.__init__(self)
19 | self.setupUi(self)
20 |
21 | self.setWindowTitle('Group Information Table')
22 | self.checkbtn.clicked.connect(self.sampleEdit)
23 | print("open group table")
24 |
25 | def setuptable(self,pd):
26 |
27 | self.df = pd
28 | self.df = self.df.fillna("None")
29 | rown = len(self.df.index)
30 | coln = len(self.df.columns)
31 | self.model = QStandardItemModel(rown, 9)
32 | # labels = list(self.df.columns.values)
33 | # rown=len(self.df.index)
34 | # self.model = QStandardItemModel(rown,9)
35 |
36 | labels=['group','rep1','rep2','rep3','control','gene','strand','start','end']
37 | ###判断格式
38 | if list(self.df.columns.values) != labels:
39 | self.showMessageBox("warning","wrong table!")
40 | return "wrong"
41 | else:
42 |
43 | self.model.setHorizontalHeaderLabels(labels)
44 | # self.tableView.resize(500,300)
45 | #下面代码让表格100填满窗口
46 | self.tableView.horizontalHeader().setStretchLastSection(True)
47 | self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
48 |
49 | for row in range(rown):
50 | #print(self.df.loc[row].Sample)
51 | for column in range(9):
52 | item = QStandardItem(str(self.df.loc[row][labels[column]]))
53 | self.model.setItem(row, column, item)
54 |
55 | self.tableView.setModel(self.model)
56 | return "yes"
57 |
58 | def sampleEdit(self):
59 | self.close() ## 关闭窗口
60 | # ############## warning message #########
61 | def showMessageBox(self, title, message):
62 | msgBox = QtWidgets.QMessageBox()
63 | msgBox.setIcon(QtWidgets.QMessageBox.Warning)
64 | msgBox.setWindowTitle(title)
65 | msgBox.setText(message)
66 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
67 | msgBox.exec_()
68 | ##################################################
69 |
70 |
71 |
72 |
73 | if __name__ == "__main__":
74 | app = QtWidgets.QApplication(sys.argv)
75 | window = showtable()
76 | window.show()
77 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/CMlib/show_fasta.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from PyQt5 import uic,QtWidgets
4 | from PyQt5.QtGui import QStandardItemModel, QStandardItem
5 | from PyQt5.QtWidgets import QHeaderView
6 | import os
7 |
8 | path = os.getcwd()
9 | qtCreatorFile = os.path.join(path,'CMlib/show_fasta.ui')
10 |
11 | Ui_showfasta, QtBaseClass = uic.loadUiType(qtCreatorFile)
12 |
13 | class showfasta(QtWidgets.QDialog, Ui_showfasta):
14 | def __init__(self):
15 | QtWidgets.QDialog.__init__(self)
16 | Ui_showfasta.__init__(self)
17 | self.setupUi(self)
18 |
19 | self.setWindowTitle('Gene Sequence')
20 | self.checkbtn.clicked.connect(self.sampleEdit)
21 | print("open gene fasta")
22 |
23 | def setuptext(self,fasta):
24 |
25 | self.fasta=fasta
26 | if '>' in self.fasta[0]:
27 | self.string=str()
28 | for i in self.fasta:
29 | self.string +=str(i)
30 | self.textEdit.setText(self.string)
31 | return "yes"
32 | else:
33 | self.showMessageBox("warning", "This is not a Fasta file")
34 | return "wrong"
35 | # rown = len(self.df.index)
36 | # coln = len(self.df.columns)
37 | # self.model = QStandardItemModel(rown, 8)
38 | # # labels = list(self.df.columns.values)
39 | # # rown=len(self.df.index)
40 | # # self.model = QStandardItemModel(rown,9)
41 | # labels=['group','rep1','rep2','control','gene','strand','start','end']
42 | # self.model.setHorizontalHeaderLabels(labels)
43 | # # self.tableView.resize(500,300)
44 | # #下面代码让表格100填满窗口
45 | # self.tableView.horizontalHeader().setStretchLastSection(True)
46 | # self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
47 | #
48 | # for row in range(rown):
49 | # #print(self.df.loc[row].Sample)
50 | # for column in range(8):
51 | # item = QStandardItem(str(self.df.loc[row][labels[column]]))
52 | # self.model.setItem(row, column, item)
53 | #
54 | # self.tableView.setModel(self.model)
55 |
56 | def sampleEdit(self):
57 | self.close() ## 关闭窗口
58 | # ############## warning message #########
59 | def showMessageBox(self, title, message):
60 | msgBox = QtWidgets.QMessageBox()
61 | msgBox.setIcon(QtWidgets.QMessageBox.Warning)
62 | msgBox.setWindowTitle(title)
63 | msgBox.setText(message)
64 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
65 | msgBox.exec_()
66 | ##################################################
67 |
68 |
69 |
70 | if __name__ == "__main__":
71 | app = QtWidgets.QApplication(sys.argv)
72 | window = showfasta()
73 | window.show()
74 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/CMlib/show_barcodestable.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 |
5 | from PyQt5 import uic,QtWidgets
6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem
7 | from PyQt5.QtWidgets import QHeaderView
8 | import os
9 |
10 | path = os.getcwd()
11 | qtCreatorFile = os.path.join(path,'CMlib/show_sampletable.ui')
12 |
13 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile)
14 |
15 | class showtable(QtWidgets.QDialog, Ui_showtable):
16 | def __init__(self):
17 | QtWidgets.QDialog.__init__(self)
18 | Ui_showtable.__init__(self)
19 | self.setupUi(self)
20 |
21 | self.setWindowTitle('Barcode Information Table')
22 | self.checkbtn.setText("Confirm")
23 | self.checkbtn.clicked.connect(self.sampleEdit)
24 | print("open barcode infor table")
25 | self.edit = ""
26 | self.newtable = ""
27 |
28 | def setuptable(self,pd):
29 |
30 | self.df=pd
31 | rown = len(self.df.index)
32 | coln = len(self.df.columns)
33 | self.model = QStandardItemModel(rown, 4)
34 | # labels = list(self.df.columns.values)
35 | # rown=len(self.df.index)
36 | # self.model = QStandardItemModel(rown,9)
37 | labels=['Index','Sample','Barcode_L','Barcode_R']
38 | ###判断格式
39 | if list(self.df.columns.values) != labels:
40 | # print(list(self.df.columns.values))
41 | self.showMessageBox("warning", "wrong table!")
42 | return "wrong"
43 | else:
44 | self.model.setHorizontalHeaderLabels(labels)
45 | # self.tableView.resize(500,300)
46 | #下面代码让表格100填满窗口
47 | self.tableView.horizontalHeader().setStretchLastSection(True)
48 | self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
49 |
50 | for row in range(rown):
51 | #print(self.df.loc[row].Sample)
52 | for column in range(4):
53 | item = QStandardItem(str(self.df.loc[row][labels[column]]))
54 | self.model.setItem(row, column, item)
55 |
56 | self.tableView.setModel(self.model)
57 |
58 | return "yes"
59 |
60 | def sampleEdit(self):
61 | rown = len(self.df.index)
62 | coln = len(self.df.columns)
63 | # labels = ['Index', 'Sample', 'Barcode_L', 'Barcode_R']
64 | self.newtable = self.df
65 | for row in range(rown):
66 | for column in range(coln):
67 | item = self.model.item(row, column)
68 | text=item.text()
69 | self.newtable.iloc[row,column] = text
70 |
71 | self.showMessageBox("Success","The barcode table has been edited!")
72 |
73 | self.edit = "yes"
74 |
75 | def resulttest(self):
76 | return self.edit, self.newtable
77 |
78 | # return newtable,
79 |
80 | # ############## warning message #########
81 | def showMessageBox(self, title, message):
82 | msgBox = QtWidgets.QMessageBox()
83 | msgBox.setIcon(QtWidgets.QMessageBox.Warning)
84 | msgBox.setWindowTitle(title)
85 | msgBox.setText(message)
86 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
87 | msgBox.exec_()
88 | ##################################################
89 | def showfinishBox(self, title, message):
90 | msgBox = QtWidgets.QMessageBox()
91 | msgBox.setWindowTitle(title)
92 | msgBox.setIcon(QtWidgets.QMessageBox.Information)
93 | msgBox.setText(message)
94 | # msgBox.setDetailedText("The project has finished, please check the result!")
95 | msgBox.exec_()
96 |
97 | ################################################
98 |
99 |
100 |
101 | if __name__ == "__main__":
102 | app = QtWidgets.QApplication(sys.argv)
103 | window = showtable()
104 | window.show()
105 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # CRISPRMatchGUI
2 | ## CRISPRMatch is no longer actively maintained.
3 | ## We release new CRIPSR data analysis software: [CrisprStitch](https://zhangtaolab.org/software/crisprstitch) .
4 | ## Brief introduction
5 | The Graphical User Interface(GUI) for CRISPRMatch--An automatic calculation and visualization tool for high-throughput CRISPR genome-editing data analysis
6 | ## I. Requirements(软件所需依赖包)
7 | Anaconda
8 | python3
9 | bwa
10 | samtools
11 | FLASH
12 | pyqt5
13 |
14 | [**Note:**] Using `Anaconda` to Install all packages (`bwa,samtools,picard,FLASH`) ##应用conda统一安装即可,方便快捷
15 |
16 | ## II. Manually Install(手动安装,非虚拟机)
17 | CentOS Linux release 7.3.1611 (terminal)
18 | 1. Install Anaconda
19 | ```
20 | $ yum install wget git ##安装git和wget程序
21 | $ mkdir /home/software ##创建下载软件的文件夹,这里以software为例
22 | $ cd /home/software ##进入到software文件夹下
23 | $ wget https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh ##下载linux对应的conda版本,注意尽量选择3.5版本的!
24 | $ bash Anaconda3-5.0.1-Linux-x86_64.sh ##用bash命令安装conda
25 | ```
26 | 2. Install required packages ##利用conda安装所有依赖包(建议用清华软件源镜像替换,缩短安装时间)
27 | ```
28 | $ conda install bwa \ ##用\符号和回车分隔多个软件
29 | samtools \
30 | pyqt=5.6 \
31 | flash \
32 | matplotlib \
33 | pysam \
34 | pandas \
35 | argparse \
36 | numpy \
37 | ```
38 | **Note:** To ensure the tool working, please using `Anaconda` to install all packages (`bwa,samtools,pyqt,FLASH ...`)
39 |
40 | 3. Download CRISPRMatchGUI and test ##下载本软件的软件包
41 | ```
42 | $ cd /home/software ##进入到software文件夹下
43 | $ git clone https://github.com/zhangtaolab/CRISPRMatchGUI.git ##利用git方式下载本软件包
44 | $ cd /home/software/CRISPRMatchGUI/ ##进入本软件文件夹
45 | $ python3 /home/software/CRISPRMatchGUI/start.py ##使用python3打开软件包中的start.py程序,即可实现软件运行
46 |
47 | ```
48 | ## III. Start running(运行方法)
49 | 1. Video manual(用户手册)
50 |
51 | >(1)CRISPRMatch虚拟机使用教程
52 | - Link: https://v.youku.com/v_show/id_XMzgwODc4ODQ2NA==.html?spm=a2h3j.8428770.3416059.1
53 |
54 | >(2)双端测序数据合并教程
55 | - Link: https://v.youku.com/v_show/id_XMzkzMTY5NTEwOA==.html?scm=20140719.manual.114461.video_XMzkzMTY5NTEwOA==
56 |
57 | >(3)拆分混池测序结果(带有barcode信息)
58 | - Link: https://v.youku.com/v_show/id_XMzkzMTY5MzY4NA==.html?scm=20140719.manual.114461.video_XMzkzMTY5MzY4NA==
59 |
60 | >(4)虚拟机读取usb设备(改方法可实现大数据集计算)
61 | - Link: https://v.youku.com/v_show/id_XMzk0MDgyMjA2MA==.html?scm=20140719.manual.114461.video_XMzk0MDgyMjA2MA==
62 |
63 | 2. Mirroring file for Windows (虚拟机下载地址)
64 | - Link: https://pan.baidu.com/s/1L8KPij9SP2Mp9v7RYgS5_w code: CPF1
65 |
66 | 3. Files for mutation calculation(编辑计算所需三个信息文件)
67 | - **File1**: Genome-editing target sequences
68 | [Fasta format example](https://github.com/zhangtaolab/CRISPRMatchGUI/tree/master/sample_test/Samples_gene.fa)
69 | - **File2**: NGS samples information
70 | *note*:
71 | For CRISPR-Cas9 system, the `'Note'` must contain `'gRNA'` label.
72 | For CRISPR-Cpf1 system, the `'Note'` must contain `'crRNA'` label.
73 | *example*:
74 | [sample information](https://github.com/zhangtaolab/CRISPRMatchGUI/tree/master/sample_test/sample_infor.csv)
75 | - **File3**: NGS group information
76 | *note*: At present, two repeats are supported
77 | *example*:
78 | [group information](https://github.com/zhangtaolab/CRISPRMatchGUI/tree/master/sample_test/group_info.csv)
79 | - **Note**: the information files `File1`, `File2` and `File3` are required!
80 |
81 |
82 | 4. Merge paired-end reads(运行双端测序数据合并程序)
83 | ```
84 | $ cd /home/software/CRISPRMatchGUI/ ##进入本软件文件夹
85 | $ python3 /home/software/CRISPRMatchGUI/merge.py ##运行双端测序数据合并
86 | ```
87 | - *example*:
88 | [paired-end reads](https://github.com/zhangtaolab/CRISPRMatchGUI/tree/master/merge_sample/)
89 |
90 |
91 | 5. Split sequencing file(运行拆分混池测序结果)
92 | ```
93 | $ cd /home/software/CRISPRMatchGUI/ ##进入本软件文件夹
94 | $ python3 /home/software/CRISPRMatchGUI/split.py ##运行拆分混池测序程序
95 | ```
96 |
97 |
--------------------------------------------------------------------------------
/sample_test/Samples_gene.fa:
--------------------------------------------------------------------------------
1 | >OsPDS
2 | ATGgatactggctgcctgtcatctaTGAACATAACTGGAACCAGCCAAGCAAGATCTTTTGCGGGACAACTTCCTACTCATAGGTGCTTCGCAAGTAGCAGCATCCAAGCACTGAAAAGTAGTCAGCATGTGAGCTTTGGAGTGAAATCTCTTGTCTTAAGGAATAAAGGAAAAAGATTCCGTCGGAGGCTCGGTGCTCTACAGgttcaacctttgtactctattattgcctcacattccatctcttgtgaaaatatatttgattggcttttctgcagGTTGTTTGCCAGGACTTTCCAAGACCTCCACTAGAAAACACAATAAACTTTTTGGAAGCTGGACAACTATCTTCATTTTTCAGAAACAGTGAACAACCCACTAAACCATTACAGGTCGTGATTGCTGGAGCAGgtatgatataattctaggatttgacagatgaataatttacatatatatctaactttgatagcagtcacatcgtggtcttagcattgtagtttttagctttgatttttttttcagGATTAGCTGGTTTATCAACGGCAAAATATCTGGCAGATGCTGGTCATAAACCCATATTGCTTGAGGCAAGGGATGTTTTGGGTGGAAAGgttttactcttatgcttttatgttgcatttaattttttttgttattcattctttttttttttggttgcctttatcttaatagctcatattcactgttagtagcatttgtggattattgtttttttttttggggaaatgccttgaacagATAGCTGCTTGGAAGGATGAAGATGGAGATTGGTATGAAACTGGGCTTCATATCTTTTgtaagtaataactctggatttttaaggttctcgttgtgctatattttatttaggttattaccgccagcactgatagatatctctaagggttttgaacaaaaaaacatgtatcaaactctttcatcgataaggtagaaatgccatgcgggaagtatgaagtgatgtctgaggattaacacacatggtagttttattttgtaagaaacttttagattggtttttttcacagtactaaaaagtaactttttactagcttatatggttgataaattttaacgtcacataaatatcatgagctaattgaatataaatcctcctgttcatacatagtcttctttcaacctactattcccttccaaacatatatgaatatgacagatactgtttttccttccatgctcacactgttttgtcgtccacaacagtacatatgtgacattgttcattttgtgcctgtatgtaaccatatacctttttggtttaagTTGGAGCTTATCCCAACATACAGAACTTGTTTGGCGAGCTTGGTATTAATGATCGGTTGCAATGGAAGGAACACTCCATGATATTTGCCATGCCAAACAAGCCAGGAGAATTCAGCCGGTTTGATTTTCCTGAAACATTGCCTGCACCCTTAAATGgtgagatcatatgcagcgctggagttgtttaattaaaccaagattcccagaagtacatcgtattggtggttacttttgttttactaacacatgactgtaattagggggtatattactagcaacgttaatgatagatcaatagatcatgccatggagcttttatgttgtcaattgatgcctatttattatttatcattgatcatgcgtgcatttaacagGAATATGGGCCATACTAAGAAACAATGAAATGCTAACTTGGCCAGAGAAGGTGAAGTTTGCTCTTGGACTTTTGCCAGCAATGGTTGGTGGCCAAGCTTATGTTGAAGCTCAAGATGGTTTTACTGTTTCTGAGTGGATGAAAAAGCAGgtataagttcacaatatcagtttgtcaagtctctgtgtacaagacacatttctacctcattaatttggaatggatataggagaaggtgttgtaagctagaaaaccttttattttctaataaaaaaactgatgccctttattgttgcattcacattgggaagaactggcagttctgaggatgaaatgcttcatgtactcaagtttatgccctttattttgcccagatccttttgcacaggtttaagcttgagctatgcttttagtttaagaccactgtttcagttaaaggtcaacaaccttgcatgatttcttcctccacctagaaaagccattgcacatattgacaaagcacacaatcctgttgactatattctttatgagctaatatacagaactgttttatacagaaaacacaatacatatgctatagttatcaatctctttccctttttttgggataacggattaatatggtgcctgatacagttgtttgatcagcacagGGTGTTCCTGATCGAGTGAACGATGAGGTTTTCATTGCAATGTCAAAGGCACTTAATTTCATAAATCCTGATGAGTTATCCATGCAGTGCATTCTGATTGCTTTAAACCGATTTCTTCAGgtatttattatgttgctctatggtcatgtgtgttgcatatgagtaattcttctgttctttccggagtagtaccttacgtattacatccttcttagtgtttcttgtctctgttgtttcctaccttgaggaaactcaaatgaattttcgcttagaggccttttaaaaaaaattatgcaaatgtgtagGAGAAGCATGGTTCTAAGATGGCATTCTTGGATGGTAATCCTCCTGAAAGGTTATGCATGCCTATTGTTGACCATGTTCGCTCTTTGGGTGGTGAGGTTCGGCTGAATTCTCGTATTCAGAAAATAGAACTTAATCCTGATGGAACAGTGAAACACTTTGCACTTACTGATGGAACTCAAATAACTGGAGATGCTTATGTTTTTGCAACACCAGgtgattttctacaatctttgtttcttctgcagttcataaattatatatatgcggctactcattttaactgactagcctgtatttagTTGATATCTTGAAGCTTCTTGTACCTCAAGAGTGGAAAGAAATATCTTATTTCAAGAAGCTGGAGAAGTTGGTGGGAGTTCCTGTTATAAATGTTCATATATGgttggttggttgaattatttggttccaagtcggaaattactcatcatcgagtttgtggttctccttatgactcatattagtatttctgttggtttgaacatttcagGTTTGATAGAAAACTGAAGAACACATATGACCACCTTCTTTTCAGCAGgtgtctcttctaattcctcatcagttttgctgtcctttcactgcctcatgcatttgctctgtgctatgactggtttatgaactaaaacgatttgtattgcccaaattgggcacattctatcctgattttgtatacattcttgattaataccaaatatcatatgtcccatgtattgatcttgttcccttttctttcagGAGTTCACTTTTAAGTGTTTATGCGGACATGTCAGTAACTTGCAAGgtactaactaggagacattatatgttacgaaatagtaactatctgtcatgtattattgctcttgtgtatttgttcttgggtttaccatcttcaagcatcacatgatatttattttagtagctgtaacaaaaggcccaaaagtgcatgtgttacagaaggaatccagtattaattattaaacttggaaagtagatatattttatttcagattcatttaggcaacatgtcacttggctctagagtctagattttatggaccataatagctcaggaaattaaagacatggatgcctactgaacggttttctttccttttgttttgaactctttacagGAATACTATGATCCAAACCGTTCAATGCTGGAGTTGGTCTTTGCTCCTGCAGAGGAATGGGTTGGACGGAGTGACACTGAAATCATCGAAGCAACTATGCAAGAGCTAGCCAAGCTATTTCCTGATGAAATTGCTGCTGATCAGAGTAAAGCAAAGATTCTGAAGTATCATGTTGTGAAGACACCAAGgtgaggacattttgcaagagcgccccctatctgatatatcataggtaggtctaatagttggatgcacacttctctcacgttcctttcttttctgtctcactgttacagATCTGTTTACAAGACTATCCCGGACTGTGAACCTTGCCGACCTCTGCAAAGATCACCGATTGAAGGGTTCTATCTAGCTGGTGACTACACAAAGCAGAAATATTTGGCTTCGATGGAGGGTGCAGTTCTATCTGGGAAGCTTTGTGCTCAGTCTGTAGTGGAGgtaaacgctgctctccatggttctgtttgtacatagatgcatcagacttgtattgttgtcttggtgcagttcacaatgattcagttttgtaggctaatgagttatcacttgctgatttcag
3 |
4 | >OsDEP1
5 | ggcataataatctgtactactgccaaactgagcttttacatggtgaaaatattttccctgcagatcaaaattgtgtatctgcatttcatgtctttgctactgttgcaagtgctcacccaagtgcaaaagaccaaggtgcctcaattgttcttgcagctcatgctgcgacgagccatgctgtaagccaaactgcagtgcgtgctgcgctgggtcatgctgtagtccagactgctgctcatgctgtaaacctaactgcagttgctgcaagaccccttc
6 |
7 | >OsROC5
8 | aggtttgggctaatgtcctccacatttcgcaccgtatacttatgttccgttccaatcctataatgtactaatgttggtgttacttgcattcatttcacagagacggtggtctgacatgttctcgtgcatgattgctaaggcaacagtgcttgaggaggtgtctaccggcattgcaggaagcagaaatggcgcgttgctgctggtgagtgctgatcaacagtgctaatgttcatttatcttacatagtgtaagacgtagctaacatattttctttctgaattgttaatttttcttgtgtttgcttgcaacagatgaaggctgagctacaggtg
--------------------------------------------------------------------------------
/CMlib/output_aln_pdf.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pysam
3 | from pyfasta import Fasta
4 | import matplotlib
5 | from scipy import stats
6 | import matplotlib.pyplot as plt
7 | import pandas as pd
8 | import numpy as np
9 | import re
10 | from glob import glob
11 |
12 | def alnpdftest(infofile, output, refname, groupinfo):
13 | """
14 |
15 | :param infofile: a description file of details of each sample, example: sample_infor.txt
16 | :param output: a description file of details of each group, example: group_infor.txt
17 | :return:
18 | """
19 | info = pd.read_csv(infofile, index_col="Index")
20 | fa = Fasta(refname)
21 | groupinfor = pd.read_table(groupinfo)
22 | groupinfor.ix[:, pd.isnull(groupinfor).all()] = "UNKNOWN"
23 | groupinfor = groupinfor.fillna("UNKNOWN") ##填充表格中NaN处
24 | stranddict = dict()
25 | for idy in groupinfor.index:
26 | stranddict[groupinfor.loc[idy].rep1] = groupinfor.loc[idy].strand
27 | stranddict[groupinfor.loc[idy].rep2] = groupinfor.loc[idy].strand
28 | stranddict[groupinfor.loc[idy].control] = groupinfor.loc[idy].strand
29 |
30 |
31 | for idx in info.index:
32 |
33 | #bamname = os.path.join(bamdir, info.loc[idx].Note+'.bam')
34 | #print("Calculating",bamname)
35 |
36 | note = info.loc[idx].Note
37 | genename = info.loc[idx]['gene_name']
38 | strand = stranddict[note]
39 |
40 |
41 | if (re.search("gRNA", info.loc[idx].Note)):
42 | if strand == '+':
43 | start = info.loc[idx]['start'] - 10
44 | end = info.loc[idx]['end'] + 10
45 |
46 | else:
47 | start = info.loc[idx]['start'] - 10
48 | end = info.loc[idx]['end'] + 10
49 |
50 | elif (re.search("crRNA", info.loc[idx].Note)):
51 | if strand == '+':
52 | start = info.loc[idx]['start']
53 | end = info.loc[idx]['end'] + 30
54 |
55 | else:
56 | start = info.loc[idx]['start'] - 30
57 | end = info.loc[idx]['end']
58 |
59 | alnfile = os.path.join(output, info.loc[idx].Note + '_aln.txt')
60 | outfile = os.path.join(output, info.loc[idx].Note + '_aln.test.pdf')
61 |
62 | delfile= os.path.join(output, info.loc[idx].Note + '_del_aln.txt')
63 | snpfile= os.path.join(output, info.loc[idx].Note + '_snp_aln.txt')
64 |
65 | catcmd = ' '.join(['cat', delfile, snpfile, '>', alnfile])
66 | #print(catcmd)
67 | os.system(catcmd)
68 |
69 | #print("start output", alnfile, "figure")
70 | if os.path.getsize(alnfile): ## check aln file
71 | print("start output", alnfile, "figure")
72 | else:
73 | print("error", alnfile, "figure")
74 | continue
75 | data = pd.read_table(alnfile, header=None) # nrows=400,只读前400行,usecols=(0,1,2,5,6)只提取0,1,2,5,6列
76 | # print(len(data.columns)) ##统计列数
77 | # print(len(data.index)) ##统计行数
78 | withset = len(data.columns) * 2 + 10
79 | heightset = len(data.index) * 2 + 10
80 | # print(withset)
81 | # print(heightset)
82 | fig, ax = plt.subplots()
83 | fig.set_size_inches(0.01 * withset, 0.01 * heightset)
84 | #fig.set_size_inches(12, 18)
85 | ax.set_title(info.loc[idx].Note, size=2,fontdict={'family': 'sans-serif'})
86 | ax.set_ylim(0, heightset)
87 | ax.set_xlim(0, withset)
88 | ax.set_yticks([]) ##去掉刻度线
89 | ax.set_xticks([])
90 | ax.spines['left'].set_visible(False) ##设置边框可见性 ax.spines['left'].set_linewidth(0)可设置边框粗细
91 | ax.spines['bottom'].set_visible(False)
92 | ax.spines['right'].set_visible(False)
93 | ax.spines['top'].set_visible(False)
94 | ypos = 5
95 |
96 | seq = fa[genename][start - 1:end].upper() ##reference sequence
97 | seqlist = list()
98 |
99 | for nt in seq:
100 | seqlist.append(nt)
101 | for x in data.index: # 逐行读取txt 序列
102 | # print(x,len(data.loc[x]))
103 | n = 1
104 | xpos = 5
105 | ax.text(4, ypos + 1, data.loc[x][0], size=1, horizontalalignment='right', verticalalignment='center', )
106 |
107 | while n < len(data.loc[x]):
108 | if (data.loc[x][n] == seq[n-1]):
109 | if (data.loc[x][n] == "A"):
110 | color = "red"
111 | elif (data.loc[x][n] == "T"):
112 | color = "blue"
113 | elif (data.loc[x][n] == "G"):
114 | color = "green"
115 | elif (data.loc[x][n] == "C"):
116 | color = "orange"
117 | else:
118 | color = "white"
119 | # print("n=",n, "data=",data.loc[x][n], "color=", color)
120 | else:
121 | color = "white"
122 |
123 | ax.broken_barh([(xpos, 2)], (ypos, 2), facecolors=color, alpha=0.2)
124 | ax.text(xpos + 1, ypos + 1, data.loc[x][n], size=1, horizontalalignment='center',
125 | verticalalignment='center')
126 | n += 1
127 | xpos += 2
128 | ypos += 2
129 | # plt.show()
130 | plt.savefig(outfile, dpi=300, format="pdf")
131 | plt.close(fig)
132 | print(outfile, "have finished")
--------------------------------------------------------------------------------
/CMlib/change_color.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from PyQt5 import QtWidgets
3 | from PyQt5.QtWidgets import *
4 | from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
5 | from matplotlib.backends.backend_qt5 import NavigationToolbar2QT as NavigationToolbar
6 | from matplotlib.ticker import MultipleLocator, FormatStrFormatter
7 |
8 | import matplotlib.pyplot as plt
9 | from PyQt5.QtCore import Qt
10 | import random
11 | from os import path
12 | import pandas as pd
13 | import numpy as np
14 | import os
15 |
16 |
17 | class Changecolor(QtWidgets.QDialog):
18 | def __init__(self, parent=None):
19 | super(Changecolor, self).__init__(parent)
20 |
21 |
22 |
23 |
24 | def deletion_ratio(self,sample,reg,regPAM,colorstring):
25 |
26 | self.color = colorstring
27 |
28 |
29 | self.figure = plt.figure(figsize=(8, 6))
30 | self.canvas = FigureCanvas(self.figure)
31 | self.toolbar = NavigationToolbar(self.canvas, self)
32 |
33 | self.layout = QtWidgets.QVBoxLayout()
34 | self.layout.addWidget(self.toolbar)
35 | self.layout.addWidget(self.canvas)
36 |
37 | glabels = list(reg.fillna(" ").label)
38 |
39 | self.ax = self.figure.add_subplot(111)
40 | self.ax.bar(reg.index, reg.ratio, color =self.color.name())
41 | self.ax.set_title(sample,fontdict = {'family': 'Arial'}, size = 15)
42 | # print(self.seqlistother)
43 | # print(self.seqlistother[1])
44 | self.ax.set_xticks(reg.index,minor=True)
45 | self.ax.set_xticklabels(glabels, color="black", minor=True, fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) # minor=True表示次坐标轴
46 | self.ax.set_xticks(regPAM.index)
47 | self.ax.set_xticklabels(regPAM.label, color="red", fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12)
48 | plt.ylabel('Deletion Ratio', fontdict = {'family': 'Arial'}, size = 15)
49 | self.setLayout(self.layout)
50 | self.show()
51 |
52 | def deletion_group_ratio(self, groupname, regmean, stdrr, glabels, regPAM, regck, y_ck, ckname, colorstring):
53 | print(groupname)
54 |
55 | self.color = colorstring
56 |
57 |
58 | self.figure = plt.figure(figsize=(16, 6))
59 | self.canvas = FigureCanvas(self.figure)
60 | self.toolbar = NavigationToolbar(self.canvas, self)
61 |
62 | self.layout = QtWidgets.QVBoxLayout()
63 | self.layout.addWidget(self.toolbar)
64 | self.layout.addWidget(self.canvas)
65 |
66 | self.ax0 = self.figure.add_subplot(1, 2, 1)
67 | y = regmean
68 | y_std = stdrr
69 | self.ax0.bar(regmean.index, y, color=self.color.name())
70 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color
71 | self.ax0.errorbar(regmean.index, y, yerr=y_std, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None',
72 | ecolor='black')
73 | self.ax0.set_title(groupname, fontdict={'family': 'Times New Roman'}, size=15)
74 |
75 | self.ax0.set_xticks(regmean.index, minor=True)
76 | self.ax0.set_xticklabels(glabels, color="black", minor=True, fontdict={'family': 'Arial', 'weight': 'bold'},
77 | size=12) # minor=True表示次坐标轴
78 | self.ax0.set_xticks(regPAM.index)
79 | self.ax0.set_xticklabels(regPAM.label, color="red", fontdict={'family': 'Arial', 'weight': 'bold'}, size=12)
80 |
81 | self.ax0.set_ylabel('Deletion Ratio', fontdict={'family': 'Times New Roman'}, size=15)
82 |
83 | self.ax1 = self.figure.add_subplot(1, 2, 2)
84 | v = self.ax0.axis() ##返回子图1的坐标范围
85 | self.ax1.axis(v) ##设置子图2的坐标范围
86 |
87 | self.ax1.bar(regck.index, y_ck, color='grey')
88 | self.ax1.set_title(ckname, fontdict={'family': 'Times New Roman'}, size=15)
89 | self.ax1.set_xticks(regck.index, minor=True)
90 | self.ax1.set_xticklabels(glabels, color="black", minor=True, fontdict={'family': 'Arial', 'weight': 'bold'},
91 | size=12)
92 | self.ax1.set_xticks(regPAM.index)
93 | self.ax1.set_xticklabels(regPAM.label, color="red", fontdict={'family': 'Arial', 'weight': 'bold'}, size=12)
94 |
95 | self.setLayout(self.layout)
96 | self.show()
97 |
98 | def deletion_size(self, groupname, x, sizereg,colorstring):
99 |
100 | self.color = colorstring
101 |
102 |
103 | self.figure = plt.figure(figsize=(8, 6))
104 | self.canvas = FigureCanvas(self.figure)
105 | self.toolbar = NavigationToolbar(self.canvas, self)
106 |
107 | self.layout = QtWidgets.QVBoxLayout()
108 | self.layout.addWidget(self.toolbar)
109 | self.layout.addWidget(self.canvas)
110 |
111 |
112 | self.ax = self.figure.add_subplot(111)
113 | ymajorFormatter = FormatStrFormatter('%1.1f') ## 设置坐标轴格式
114 | self.ax.yaxis.set_major_formatter(ymajorFormatter)
115 | self.ax.bar(x, sizereg.ratio_mean, color=self.color.name())
116 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color
117 | self.ax.errorbar(x, sizereg.ratio_mean, yerr=sizereg.ratio_stdrr, fmt='', elinewidth=0.5, capsize=2,
118 | capthick=0.5, ls='None', ecolor='black')
119 | self.ax.set_title(groupname, size=15, fontdict={'family': 'Times New Roman'})
120 | self.ax.set_ylabel('Deletion Size (%)', size=15, fontdict={'family': 'Times New Roman'})
121 | self.ax.set_xticks(x)
122 | self.ax.set_xticklabels(sizereg.Index, rotation=35, fontdict={'family': 'Arial'}, size=12)
123 |
124 | self.setLayout(self.layout)
125 | self.show()
126 |
127 | if __name__ == '__main__':
128 | app = QtWidgets.QApplication(sys.argv)
129 | main = Changecolor()
130 | main.setWindowTitle('Change Color Window')
131 | main.show()
132 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/merge.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 |
5 | from PyQt5 import uic,QtWidgets
6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem
7 | from PyQt5.QtWidgets import QHeaderView
8 | import os
9 |
10 | from subprocess import Popen, PIPE
11 |
12 | path = os.getcwd()
13 | qtCreatorFile = os.path.join(path,'CMlib/flash_merge.ui')
14 |
15 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile)
16 |
17 | class showtable(QtWidgets.QDialog, Ui_showtable):
18 | def __init__(self):
19 | QtWidgets.QDialog.__init__(self)
20 | Ui_showtable.__init__(self)
21 | self.setupUi(self)
22 |
23 | self.setWindowTitle('Merge FastQ')
24 | self.leftbtn.clicked.connect(lambda: self.getfastq("left"))
25 | self.left.setReadOnly(True) ##设置不可输入
26 | self.rightbtn.clicked.connect(lambda: self.getfastq("right"))
27 | self.right.setReadOnly(True)
28 | self.outputbtn.clicked.connect(self.outputdir)
29 | self.output.setReadOnly(True)
30 | self.pushButton.clicked.connect(self.merge)
31 |
32 |
33 |
34 |
35 | def getfastq(self,file):
36 | fastqPath, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path)
37 | if fastqPath != "":
38 | if file == "left":
39 | print("fastq Direction", fastqPath)
40 | self.left.setText(fastqPath)
41 | self.leftfastq = fastqPath
42 | if file == "right":
43 | print("fastq Direction", fastqPath)
44 | self.right.setText(fastqPath)
45 | self.rightfastq = fastqPath
46 |
47 | def outputdir(self):
48 | outputdirpath = QtWidgets.QFileDialog.getExistingDirectory(self, 'open directory', path)
49 | if outputdirpath != "":
50 | print("Direction", outputdirpath)
51 | self.outputdirpath = outputdirpath
52 | self.output.setText(outputdirpath)
53 | self.outputfiledir = outputdirpath
54 |
55 | def merge(self):
56 | outname = self.name.text().rstrip()
57 | threadnumber = self.spinBox.value()
58 | if outname != "" and self.leftfastq != "" and self.rightfastq != "" and self.outputfiledir != "":
59 | flashpath = self.which('flash')
60 | if flashpath:
61 | flashversion = self.flash('flash')
62 | if flashversion == 'None':
63 | self.showMessageBox("warning","Please input flash directory")
64 | else:
65 | flashbin=flashpath[0]
66 | flashcmd = ' '.join([flashbin, '-o', outname, '-t', str(threadnumber), '-d', self.outputfiledir, self.leftfastq, self.rightfastq, '2>&1 | tee', os.path.join(self.outputfiledir, outname + '_flash.log')])
67 | print(flashcmd)
68 | runflash = Popen(flashcmd, shell=True)
69 | runflash.communicate()
70 |
71 |
72 | msgBox = QtWidgets.QMessageBox()
73 | msgBox.setWindowTitle("Information")
74 | msgBox.setIcon(QtWidgets.QMessageBox.Information)
75 | msgBox.setText("Project Done!")
76 | msgBox.setDetailedText(''.join(['File ',outname,'.extendedFrags.fastq ','is located in ',self.outputfiledir,'/']))
77 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
78 | msgBox.exec_()
79 |
80 | else:
81 | self.showMessageBox("warning","Please set output name!")
82 |
83 | def flash(self,filename):
84 | """
85 | :param filename:
86 | :return: flash version
87 | """
88 | flashpath = self.which(filename)
89 | flashcmd = ' '.join([flashpath[0], '--version'])
90 | # location= samtoolspath[0]
91 | flashrun = Popen(flashcmd, stdout=PIPE, stderr=PIPE, shell=True)
92 | i = flashrun.stdout.readlines()[0]
93 | version = i.decode('utf-8').rstrip('\n')
94 | flashrun.communicate()
95 | return version
96 |
97 | def which(self,filename):
98 | """docstring for which"""
99 | locations = os.environ.get("PATH").split(os.pathsep)
100 | candidates = []
101 | for location in locations:
102 | candidate = os.path.join(location, filename)
103 | if os.path.isfile(candidate):
104 | candidates.append(candidate)
105 | return candidates
106 |
107 |
108 | # ############## warning message #########
109 | def showMessageBox(self, title, message):
110 | msgBox = QtWidgets.QMessageBox()
111 | msgBox.setIcon(QtWidgets.QMessageBox.Warning)
112 | msgBox.setWindowTitle(title)
113 | msgBox.setText(message)
114 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
115 | msgBox.exec_()
116 | ##################################################
117 |
118 | # self.checkbtn.clicked.connect(self.sampleEdit)
119 | # print("open group table")
120 | #
121 | # def setuptable(self,pd):
122 | #
123 | # self.df=pd
124 | # rown = len(self.df.index)
125 | # coln = len(self.df.columns)
126 | # self.model = QStandardItemModel(rown, 8)
127 | # # labels = list(self.df.columns.values)
128 | # # rown=len(self.df.index)
129 | # # self.model = QStandardItemModel(rown,9)
130 | # labels=['group','rep1','rep2','control','gene','strand','start','end']
131 | # self.model.setHorizontalHeaderLabels(labels)
132 | # # self.tableView.resize(500,300)
133 | # #下面代码让表格100填满窗口
134 | # self.tableView.horizontalHeader().setStretchLastSection(True)
135 | # self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
136 | #
137 | # for row in range(rown):
138 | # #print(self.df.loc[row].Sample)
139 | # for column in range(8):
140 | # item = QStandardItem(str(self.df.loc[row][labels[column]]))
141 | # self.model.setItem(row, column, item)
142 | #
143 | # self.tableView.setModel(self.model)
144 | #
145 | # def sampleEdit(self):
146 | # self.close() ## 关闭窗口
147 |
148 |
149 |
150 |
151 | if __name__ == "__main__":
152 | app = QtWidgets.QApplication(sys.argv)
153 | window = showtable()
154 | window.show()
155 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/split.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 |
5 | from PyQt5 import uic,QtWidgets
6 | from CMlib.show_barcodestable import showtable as showbarcode
7 | from multiprocessing import Pool
8 | from functools import partial
9 | from CMlib.split_fastq import split_fastq
10 | from PyQt5.QtWidgets import QHeaderView, QPushButton,QProgressDialog
11 | from PyQt5.QtCore import Qt, QBasicTimer
12 | import os
13 |
14 | from subprocess import Popen, PIPE
15 |
16 | path = os.getcwd()
17 | qtCreatorFile = os.path.join(path,'CMlib/split_lanes.ui')
18 |
19 | Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile)
20 |
21 | class showtable(QtWidgets.QDialog, Ui_showtable):
22 | def __init__(self):
23 | QtWidgets.QDialog.__init__(self)
24 | Ui_showtable.__init__(self)
25 | self.setupUi(self)
26 |
27 | self.setWindowTitle('Split FastQ')
28 | self.resize(500,400)
29 | # self.fastqbtn.clicked.connect(lambda: self.getfastq("left"))
30 | self.fastqbtn.clicked.connect(self.getfastq)
31 | self.fastqline.setReadOnly(True) ##设置不可输入
32 | # self.rightbtn.clicked.connect(lambda: self.getfastq("right"))
33 | # self.right.setReadOnly(True)
34 | self.barcodebtn.clicked.connect(self.barcodeinfo)
35 | self.barcodeline.setReadOnly(True)
36 | self.outputbtn.clicked.connect(self.outputdir)
37 | self.outputline.setReadOnly(True)
38 |
39 | self.showbtn.clicked.connect(self.showtable)
40 | self.splitbtn.clicked.connect(self.split)
41 | self.resetbtn.clicked.connect(self.reset)
42 |
43 | self.path1 = ""
44 | self.path2 = ""
45 | self.path1check = ""
46 | self.edit = ""
47 |
48 |
49 | def getfastq(self):
50 | fastqPath, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path)
51 | if fastqPath != "":
52 | print("fastq Direction", fastqPath)
53 | self.fastqline.setText(fastqPath)
54 | # self.fastq = fastqPath
55 | self.path2 = fastqPath
56 |
57 |
58 |
59 |
60 | def barcodeinfo(self):
61 | barcodepath, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path)
62 | if barcodepath != "":
63 | print("Direction", barcodepath)
64 | self.barcodeline.setText(barcodepath)
65 | self.dfbarcode = pd.read_csv(str(barcodepath))
66 | self.path1 = barcodepath
67 |
68 | def outputdir(self):
69 | outputdirpath = QtWidgets.QFileDialog.getExistingDirectory(self, 'open directory', path)
70 | if outputdirpath != "":
71 | print("Direction", outputdirpath)
72 | self.outputdirpath = outputdirpath
73 | self.outputline.setText(outputdirpath)
74 | self.outputfiledir = outputdirpath
75 |
76 |
77 | def showtable(self):
78 | if self.path1 !="":
79 | self.ui = showbarcode() ##打开showtable新窗口
80 | result = self.ui.setuptable(self.dfbarcode) ##传递倒入sample csv
81 |
82 | if result == "yes":
83 | self.ui.show() ##显示窗
84 | # self.newdfbarcode, self.edit = self.ui.sampleEdit()
85 |
86 | # print(newdf)
87 | self.path1check = self.path1
88 | else:
89 | self.path1check = ""
90 | self.path1 = ""
91 | else:
92 | self.showMessageBox('Warning', 'Please load Sample information Table first')
93 | self.path1check=""
94 |
95 | def reset(self):
96 | self.path1 = ""
97 | self.path2 = ""
98 | self.path1check = ""
99 | self.edit = ""
100 |
101 |
102 | def split(self):
103 | if self.path2 != "":
104 | if self.path1check != "":
105 | self.edit, self.newdfbarcode = self.ui.resulttest() ##check table has fixed
106 | if self.edit == "yes":
107 | self.showbarprocess("Prepare for splitting...")
108 | # self.figures = Example()
109 | # self.figures.initUI()
110 | # self.figures.show()
111 |
112 | pool = Pool(4)
113 | pool.map(partial(split_fastq, df=self.newdfbarcode, fastq=self.path2, output=self.outputfiledir),
114 | list(self.newdfbarcode.index))
115 | # pool.map(partial(split_fastq,df=self.dfbarcode,fastq=self.path2,output=self.outputfiledir),list(self.dfbarcode.index))
116 | print('done')
117 | # self.showMessageBox('Warning', 'Please click show buttons for information checking')
118 | self.showfinishBox('Information','The project have been done!')
119 | else:
120 | self.showMessageBox('Warning', 'Please click "Confirm" button for barcodes checking')
121 | self.edit = ""
122 |
123 |
124 | else:
125 | self.showMessageBox('Warning', 'Please click show buttons for information checking and confirming')
126 | self.path1check = ""
127 | else:
128 | self.showMessageBox('Warning', 'Please load fastq file first')
129 | self.path2 = ""
130 |
131 |
132 |
133 | # ############## warning message #########
134 | def showMessageBox(self, title, message):
135 | msgBox = QtWidgets.QMessageBox()
136 | msgBox.setIcon(QtWidgets.QMessageBox.Warning)
137 | msgBox.setWindowTitle(title)
138 | msgBox.setText(message)
139 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
140 | msgBox.exec_()
141 | ##################################################
142 |
143 | def showfinishBox(self, title, message):
144 | msgBox = QtWidgets.QMessageBox()
145 | msgBox.setWindowTitle(title)
146 | msgBox.setIcon(QtWidgets.QMessageBox.Information)
147 | msgBox.setText(message)
148 | msgBox.setDetailedText("The project has finished, please check the result!")
149 | msgBox.exec_()
150 |
151 | ################################################
152 |
153 | def showbarprocess(self,content):
154 |
155 | num = int(100000)
156 | progress = QProgressDialog(parent=self)
157 | progress.setWindowTitle("Start Processing ...")
158 | progress.setLabelText(content)
159 | # progress.setCancelButtonText("0")
160 | progress.setCancelButton(None) ##不显示cancel button
161 | progress.setMinimumDuration(5)
162 | progress.setWindowModality(Qt.WindowModal)
163 | progress.setRange(0, num)
164 |
165 | for i in range(num):
166 | progress.setValue(i)
167 | else:
168 | progress.setValue(num)
169 |
170 | progress.cancel() ##直接关闭
171 |
172 |
173 | if __name__ == "__main__":
174 | app = QtWidgets.QApplication(sys.argv)
175 | window = showtable()
176 | window.show()
177 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/CMlib/plotfigures.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from PyQt5 import QtWidgets
3 | from PyQt5.QtWidgets import *
4 | from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
5 | from matplotlib.backends.backend_qt5 import NavigationToolbar2QT as NavigationToolbar
6 | from matplotlib.ticker import MultipleLocator, FormatStrFormatter
7 |
8 | import matplotlib.pyplot as plt
9 | from PyQt5.QtCore import Qt
10 | from CMlib.change_color import Changecolor
11 | import random
12 | from os import path
13 | import pandas as pd
14 | import numpy as np
15 | import os
16 |
17 |
18 | class Window(QtWidgets.QDialog):
19 | def __init__(self, parent=None):
20 | super(Window, self).__init__(parent)
21 |
22 |
23 |
24 | ############plot deletion ratio bar###################################
25 | def deletion_ratio(self,sample,reg,regPAM):
26 |
27 | self.button = QPushButton('Color', self)
28 |
29 | self.button.move(20, 20)
30 |
31 | self.figure = plt.figure(figsize=(8, 6))
32 | self.canvas = FigureCanvas(self.figure)
33 | self.toolbar = NavigationToolbar(self.canvas, self)
34 |
35 | self.layout = QtWidgets.QVBoxLayout()
36 | self.layout.addWidget(self.toolbar)
37 | self.layout.addWidget(self.canvas)
38 | self.layout.addWidget(self.button)
39 |
40 | glabels = list(reg.fillna(" ").label)
41 |
42 | self.button.clicked.connect(lambda: self.showDialog(sample,reg,regPAM))
43 | self.ax = self.figure.add_subplot(111)
44 | self.ax.bar(reg.index, reg.ratio, color='blue')
45 | self.ax.set_title(sample,fontdict = {'family': 'Arial'}, size = 15)
46 | # print(self.seqlistother)
47 | # print(self.seqlistother[1])
48 | self.ax.set_xticks(reg.index,minor=True)
49 | self.ax.set_xticklabels(glabels, color="black", minor=True, fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) # minor=True表示次坐标轴
50 | self.ax.set_xticks(regPAM.index)
51 | self.ax.set_xticklabels(regPAM.label, color="red", fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12)
52 | plt.ylabel('Deletion Ratio (%)', fontdict = {'family': 'Arial'}, size = 15)
53 | self.setLayout(self.layout)
54 | self.show()
55 |
56 | def showDialog(self,sample,reg,regPAM):
57 | self.color = QColorDialog.getColor()
58 | if self.color.isValid():
59 | self.ui = Changecolor()
60 | self.ui.deletion_ratio(sample,reg,regPAM,self.color)
61 | self.ui.show()
62 | #######################################################################
63 |
64 | ############deletion group ratio bar###################################
65 | def deletion_group_ratio(self,groupname,regmean,stdrr,glabels,regPAM,regck,y_ck,ckname):
66 | print(groupname)
67 |
68 | self.button = QPushButton('Color', self)
69 |
70 | self.button.move(20, 20)
71 |
72 | self.figure = plt.figure(figsize=(16, 6))
73 |
74 | self.canvas = FigureCanvas(self.figure)
75 | self.toolbar = NavigationToolbar(self.canvas, self)
76 |
77 | self.layout = QtWidgets.QVBoxLayout()
78 | self.layout.addWidget(self.toolbar)
79 | self.layout.addWidget(self.canvas)
80 | self.layout.addWidget(self.button)
81 |
82 | self.button.clicked.connect(lambda: self.changec_group(groupname,regmean,stdrr,glabels,regPAM,regck,y_ck,ckname))
83 |
84 | self.ax0 = self.figure.add_subplot(1,2,1)
85 | y = regmean
86 | y_std = stdrr
87 | self.ax0.bar(regmean.index, y, color='purple')
88 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color
89 | self.ax0.errorbar(regmean.index, y, yerr=y_std, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None',
90 | ecolor='black')
91 | self.ax0.set_title(groupname,fontdict = {'family': 'Times New Roman'}, size = 15)
92 |
93 | self.ax0.set_xticks(regmean.index, minor=True)
94 | self.ax0.set_xticklabels(glabels, color="black", minor=True, fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12) # minor=True表示次坐标轴
95 | self.ax0.set_xticks(regPAM.index)
96 | self.ax0.set_xticklabels(regPAM.label, color="red", fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12)
97 |
98 | self.ax0.set_ylabel('Deletion Ratio (%)', fontdict={'family': 'Times New Roman'}, size=15)
99 |
100 | self.ax1 = self.figure.add_subplot(1,2,2)
101 | v=self.ax0.axis() ##返回子图1的坐标范围
102 | self.ax1.axis(v) ##设置子图2的坐标范围
103 |
104 | self.ax1.bar(regck.index, y_ck, color='grey')
105 | self.ax1.set_title(ckname,fontdict = {'family': 'Times New Roman'}, size = 15)
106 | self.ax1.set_xticks(regck.index,minor=True)
107 | self.ax1.set_xticklabels(glabels, color="black", minor=True, fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12)
108 | self.ax1.set_xticks(regPAM.index)
109 | self.ax1.set_xticklabels(regPAM.label, color="red", fontdict = {'family': 'Arial','weight' : 'bold'}, size = 12)
110 |
111 | self.setLayout(self.layout)
112 | self.show()
113 |
114 | def changec_group(self,groupname,regmean,stdrr,glabels,regPAM,regck,y_ck,ckname):
115 | self.color = QColorDialog.getColor()
116 | if self.color.isValid():
117 | self.ui = Changecolor()
118 | self.ui.deletion_group_ratio(groupname,regmean,stdrr,glabels,regPAM,regck,y_ck,ckname,self.color)
119 | self.ui.show()
120 | #######################################################################
121 |
122 | ############deletion size bar###################################
123 | def deletion_size(self,groupname, x,sizereg):
124 | print(groupname)
125 | self.button = QPushButton('Color', self)
126 |
127 | self.button.move(20, 20)
128 |
129 | self.figure = plt.figure(figsize=(8, 6))
130 | self.canvas = FigureCanvas(self.figure)
131 | self.toolbar = NavigationToolbar(self.canvas, self)
132 |
133 | self.layout = QtWidgets.QVBoxLayout()
134 | self.layout.addWidget(self.toolbar)
135 | self.layout.addWidget(self.canvas)
136 | self.layout.addWidget(self.button)
137 |
138 | self.button.clicked.connect(lambda: self.changec_size(groupname, x, sizereg))
139 |
140 |
141 | self.ax = self.figure.add_subplot(111)
142 | ymajorFormatter = FormatStrFormatter('%1.1f') ## 设置坐标轴格式
143 | self.ax.yaxis.set_major_formatter(ymajorFormatter)
144 | self.ax.bar(x, sizereg.ratio_mean, color='red')
145 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color
146 | self.ax.errorbar(x, sizereg.ratio_mean, yerr=sizereg.ratio_stdrr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None',ecolor='black')
147 | self.ax.set_title(groupname, size=15, fontdict={'family': 'Times New Roman'})
148 | self.ax.set_ylabel('Deletion Size (%)', size=15, fontdict={'family': 'Times New Roman'})
149 | self.ax.set_xticks(x)
150 | self.ax.set_xticklabels(sizereg.Index, rotation=35, fontdict={'family': 'Arial'}, size=12)
151 |
152 | self.setLayout(self.layout)
153 | self.show()
154 |
155 | def changec_size(self,groupname, x,sizereg):
156 | self.color = QColorDialog.getColor()
157 | if self.color.isValid():
158 | self.ui = Changecolor()
159 | self.ui.deletion_size(groupname, x,sizereg, self.color)
160 | self.ui.show()
161 |
162 | #######################################################################
163 |
164 | if __name__ == '__main__':
165 | app = QtWidgets.QApplication(sys.argv)
166 | main = Window()
167 | main.setWindowTitle('Bar plot')
168 | main.show()
169 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/CMlib/start.bak.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | mainWindow
4 |
5 |
6 |
7 | 0
8 | 0
9 | 800
10 | 600
11 |
12 |
13 |
14 | MainWindow
15 |
16 |
17 |
18 |
19 |
20 | 290
21 | 10
22 | 231
23 | 51
24 |
25 |
26 |
27 |
28 | Arial
29 | 24
30 | 75
31 | true
32 |
33 |
34 |
35 | CRISPRMatch Start
36 |
37 |
38 | Qt::AlignCenter
39 |
40 |
41 |
42 |
43 |
44 | 60
45 | 410
46 | 231
47 | 51
48 |
49 |
50 |
51 |
52 | Arial
53 | 15
54 | 75
55 | true
56 |
57 |
58 |
59 | 5. Processing info
60 |
61 |
62 |
63 |
64 |
65 | 100
66 | 458
67 | 471
68 | 101
69 |
70 |
71 |
72 |
73 |
74 |
75 | 620
76 | 490
77 | 113
78 | 32
79 |
80 |
81 |
82 |
83 | Arial
84 | 20
85 |
86 |
87 |
88 | background-color: rgb(189, 53, 35);
89 |
90 |
91 | Start
92 |
93 |
94 |
95 |
96 |
97 | 340
98 | 120
99 | 231
100 | 211
101 |
102 |
103 |
104 | -
105 |
106 |
-
107 |
108 |
109 | Load
110 |
111 |
112 |
113 | -
114 |
115 |
116 | Show
117 |
118 |
119 |
120 |
121 |
122 | -
123 |
124 |
-
125 |
126 |
127 | Load
128 |
129 |
130 |
131 | -
132 |
133 |
134 | Show
135 |
136 |
137 |
138 |
139 |
140 | -
141 |
142 |
-
143 |
144 |
145 | Load
146 |
147 |
148 |
149 | -
150 |
151 |
152 | Show
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 | 100
164 | 370
165 | 202
166 | 24
167 |
168 |
169 |
170 | -
171 |
172 |
173 | Input
174 |
175 |
176 |
177 | -
178 |
179 |
180 | -
181 |
182 |
183 | ...
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 | 360
193 | 370
194 | 214
195 | 24
196 |
197 |
198 |
199 | -
200 |
201 |
202 | Output
203 |
204 |
205 |
206 | -
207 |
208 |
209 | -
210 |
211 |
212 | ...
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 | 60
222 | 100
223 | 231
224 | 261
225 |
226 |
227 |
228 | -
229 |
230 |
231 |
232 | Arial
233 | 15
234 | 75
235 | true
236 |
237 |
238 |
239 | 1. Load Sample Information
240 |
241 |
242 |
243 | -
244 |
245 |
246 |
247 | Arial
248 | 15
249 | 75
250 | true
251 |
252 |
253 |
254 | 2. Load Gene Sequence
255 |
256 |
257 |
258 | -
259 |
260 |
261 |
262 | Arial
263 | 15
264 | 75
265 | true
266 |
267 |
268 |
269 | 3. Load Group Information
270 |
271 |
272 |
273 | -
274 |
275 |
276 |
277 | Arial
278 | 15
279 | 75
280 | true
281 |
282 |
283 |
284 | 4. Input and Output Directory
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
--------------------------------------------------------------------------------
/CMlib/split_lanes.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 485
10 | 403
11 |
12 |
13 |
14 |
15 | 0
16 | 0
17 |
18 |
19 |
20 | Dialog
21 |
22 |
23 |
24 |
25 | 12
26 | 12
27 | 471
28 | 21
29 |
30 |
31 |
32 |
33 | 0
34 | 0
35 |
36 |
37 |
38 |
39 | Arial
40 | 18
41 | 75
42 | true
43 |
44 |
45 |
46 | Sample Split
47 |
48 |
49 | Qt::AlignCenter
50 |
51 |
52 |
53 |
54 |
55 | 12
56 | 41
57 | 461
58 | 281
59 |
60 |
61 |
62 | -
63 |
64 |
-
65 |
66 |
67 |
68 | Arial
69 | 13
70 | 75
71 | true
72 |
73 |
74 |
75 | 1. Load merged fastq
76 |
77 |
78 |
79 | -
80 |
81 |
-
82 |
83 |
84 |
85 | 14
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | -
94 |
95 |
96 | ...
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 | -
105 |
106 |
-
107 |
108 |
109 |
110 | Arial
111 | 13
112 | 75
113 | true
114 |
115 |
116 |
117 | 2. Load barcode table
118 |
119 |
120 |
121 | -
122 |
123 |
-
124 |
125 |
126 |
127 | 14
128 |
129 |
130 |
131 |
132 | -
133 |
134 |
135 | ...
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 | -
144 |
145 |
-
146 |
147 |
148 |
149 | Arial
150 | 13
151 | 75
152 | true
153 |
154 |
155 |
156 | 3. Output directory
157 |
158 |
159 |
160 | -
161 |
162 |
-
163 |
164 |
165 |
166 | 14
167 |
168 |
169 |
170 |
171 | -
172 |
173 |
174 | ...
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 | 30
188 | 350
189 | 421
190 | 33
191 |
192 |
193 |
194 | -
195 |
196 |
197 |
198 | 0
199 | 0
200 |
201 |
202 |
203 |
204 | Arial
205 | 15
206 |
207 |
208 |
209 | background-color: rgb(255, 255, 255);
210 | color: rgb(25, 25, 25);
211 |
212 |
213 | Barcodes
214 |
215 |
216 |
217 | -
218 |
219 |
220 |
221 | 0
222 | 0
223 |
224 |
225 |
226 |
227 | Arial
228 | 15
229 |
230 |
231 |
232 | background-color: rgb(230, 230, 230);
233 | color: rgb(252, 1, 7);
234 |
235 |
236 | Split Now!
237 |
238 |
239 |
240 | -
241 |
242 |
243 |
244 | 0
245 | 0
246 |
247 |
248 |
249 |
250 | Arial
251 | 15
252 |
253 |
254 |
255 | background-color: rgb(255, 255, 255);
256 |
257 |
258 | Reset
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 | resetbtn
269 | clicked()
270 | fastqline
271 | clear()
272 |
273 |
274 | 373
275 | 352
276 |
277 |
278 | 359
279 | 105
280 |
281 |
282 |
283 |
284 | resetbtn
285 | clicked()
286 | barcodeline
287 | clear()
288 |
289 |
290 | 384
291 | 358
292 |
293 |
294 | 385
295 | 205
296 |
297 |
298 |
299 |
300 | resetbtn
301 | clicked()
302 | outputline
303 | clear()
304 |
305 |
306 | 307
307 | 364
308 |
309 |
310 | 308
311 | 301
312 |
313 |
314 |
315 |
316 |
317 |
--------------------------------------------------------------------------------
/CMlib/start.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | mainWindow
4 |
5 |
6 |
7 | 0
8 | 0
9 | 600
10 | 500
11 |
12 |
13 |
14 | MainWindow
15 |
16 |
17 |
18 |
19 |
20 | 0
21 | 0
22 | 601
23 | 51
24 |
25 |
26 |
27 |
28 | 0
29 | 0
30 |
31 |
32 |
33 |
34 | Arial
35 | 18
36 | 75
37 | true
38 |
39 |
40 |
41 | CRISPRMatch Start
42 |
43 |
44 | Qt::AlignCenter
45 |
46 |
47 |
48 |
49 |
50 | 20
51 | 320
52 | 281
53 | 51
54 |
55 |
56 |
57 |
58 | Arial
59 | 13
60 | 75
61 | true
62 |
63 |
64 |
65 | 5. Processing info
66 |
67 |
68 |
69 |
70 |
71 | 20
72 | 360
73 | 411
74 | 111
75 |
76 |
77 |
78 |
79 |
80 |
81 | 460
82 | 390
83 | 113
84 | 32
85 |
86 |
87 |
88 |
89 | Arial
90 | 18
91 |
92 |
93 |
94 | background-color: rgb(189, 53, 35);
95 |
96 |
97 | Start
98 |
99 |
100 |
101 |
102 |
103 | 300
104 | 60
105 | 231
106 | 161
107 |
108 |
109 |
110 | -
111 |
112 |
-
113 |
114 |
115 | Load
116 |
117 |
118 |
119 | -
120 |
121 |
122 | Show
123 |
124 |
125 |
126 |
127 |
128 | -
129 |
130 |
-
131 |
132 |
133 | Load
134 |
135 |
136 |
137 | -
138 |
139 |
140 | Show
141 |
142 |
143 |
144 |
145 |
146 | -
147 |
148 |
-
149 |
150 |
151 | Load
152 |
153 |
154 |
155 | -
156 |
157 |
158 | Show
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 | 40
170 | 290
171 | 202
172 | 24
173 |
174 |
175 |
176 | -
177 |
178 |
179 | Input
180 |
181 |
182 |
183 | -
184 |
185 |
186 | -
187 |
188 |
189 | ...
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 | 310
199 | 290
200 | 214
201 | 24
202 |
203 |
204 |
205 | -
206 |
207 |
208 | Output
209 |
210 |
211 |
212 | -
213 |
214 |
215 | -
216 |
217 |
218 | ...
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 | 20
228 | 60
229 | 281
230 | 211
231 |
232 |
233 |
234 | -
235 |
236 |
237 |
238 | Arial
239 | 13
240 | 75
241 | true
242 |
243 |
244 |
245 | 1. Load Sample Information
246 |
247 |
248 |
249 | -
250 |
251 |
252 |
253 | Arial
254 | 13
255 | 75
256 | true
257 |
258 |
259 |
260 | 2. Load Gene Sequence
261 |
262 |
263 |
264 | -
265 |
266 |
267 |
268 | Arial
269 | 13
270 | 75
271 | true
272 |
273 |
274 |
275 | 3. Load Group Information
276 |
277 |
278 |
279 | -
280 |
281 |
282 |
283 | Arial
284 | 13
285 | 75
286 | true
287 |
288 |
289 |
290 | 4. Input and Output Directory
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 | 460
300 | 440
301 | 113
302 | 32
303 |
304 |
305 |
306 |
307 | Arial
308 | 18
309 |
310 |
311 |
312 |
313 |
314 |
315 | Result
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
--------------------------------------------------------------------------------
/CMlib/bwa.py:
--------------------------------------------------------------------------------
1 | import os
2 | from subprocess import Popen
3 | from subprocess import PIPE
4 | import re
5 | import shutil
6 | from CMlib import subprocesspath
7 | import time
8 | import signal
9 |
10 |
11 | def testbwa(bwabin):
12 | """
13 |
14 | :param bwabin: bwa bin path
15 | :return: bool, True: bwa tested ok. False: bwa error
16 | """
17 |
18 | bwacmd = [bwabin]
19 |
20 | bwarun = Popen(bwacmd, stdout=PIPE, stderr=PIPE, shell=True)
21 |
22 | # bwarun.communicate()
23 |
24 | testres = False
25 |
26 | pat = re.compile('Version')
27 |
28 | for i in bwarun.stderr.readlines():
29 |
30 | i = i.decode('utf-8').rstrip('\n')
31 |
32 | if re.search(pat, i):
33 |
34 | testres = True
35 |
36 | bwarun.communicate()
37 |
38 | return testres
39 |
40 |
41 | def bwaversion(bwabin):
42 | """
43 |
44 | :param bwabin: bwa bin path
45 | :return: string, version of bwa
46 | """
47 |
48 | bwacmd = [bwabin]
49 |
50 | bwarun = Popen(bwacmd, stdout=PIPE, stderr=PIPE)
51 |
52 | pat = re.compile('Version')
53 |
54 | version = 'None'
55 |
56 | for i in bwarun.stderr.readlines():
57 |
58 | i = i.decode('utf-8').rstrip('\n')
59 |
60 | if re.search(pat, i):
61 |
62 | (_, version) = i.split(' ')
63 |
64 | bwarun.communicate()
65 |
66 | return version
67 |
68 | def bwaindex(bwabin, reffile, samplefolder):
69 | """
70 | bwa index
71 | :param bwabin: bwa bin path
72 | :param reffile: reference genome file
73 | :param samplefolder: sample dir
74 | :return: no retrun
75 | """
76 |
77 | refbasename = os.path.basename(os.path.abspath(reffile))
78 |
79 | dscopy = os.path.join(samplefolder, refbasename)
80 |
81 | shutil.copyfile(os.path.abspath(reffile), dscopy)
82 |
83 | # refinsample = os.path.join(samplefolder, refbasename)
84 | bwabin = os.path.abspath(bwabin)
85 |
86 | bwacmd = [bwabin, 'index', refbasename]
87 | # print(bwacmd)
88 | runbwaindex = Popen(bwacmd, cwd=samplefolder)
89 |
90 | runbwaindex.communicate()
91 |
92 |
93 |
94 |
95 | def bwaalign(bwabin, reffile, inputfile, outfile, threadnumber=1):
96 | """
97 | bwa mem alignment
98 | :param bwabin: bwa bin path
99 | :param reffile: reference file, make by bwa index
100 | :param inputfile: sequence or reads file
101 | :param outfile: samfile
102 | :param threadnumber: number of threads
103 | :return: True
104 | """
105 |
106 | # bwabin = subprocesspath.subprocesspath(bwabin)
107 |
108 | ##/Users/Forrest/SVN/bwa/bwa mem -O 0 -B 0 -E 0 -k 5 ../DM_404.fa oligo_tmp2.fa
109 | bwabin = subprocesspath.subprocesspath(bwabin)
110 | reffile = subprocesspath.subprocesspath(reffile)
111 | inputfile = subprocesspath.subprocesspath(inputfile)
112 | outfile = subprocesspath.subprocesspath(outfile)
113 |
114 | bwacmd = ' '.join([bwabin, 'mem', '-O',' 0',' -B',' 0',' -E',' 0',' -k',' 5', '-t',str(threadnumber), reffile, inputfile, '>', outfile])
115 |
116 | print(bwacmd)
117 |
118 | runbwaalign = Popen(bwacmd, shell=True)
119 |
120 | runbwaalign.communicate()
121 |
122 | return True
123 |
124 | def samfilter(samfile, minas, maxxs):
125 | """
126 |
127 | :param samfile: samfile
128 | :param minas: min AS:i score, suggest probe length
129 | :param maxxs: max XS:i score, suggest probe length * homology
130 | :return: list, list of probe/sequence
131 | """
132 | seqlist = list()
133 |
134 | pat = re.compile('^@')
135 |
136 | inio = open(samfile,'r')
137 |
138 | aspat = re.compile('AS:i:(\d.)')
139 |
140 | xspat = re.compile('XS:i:(\d.)')
141 |
142 | for i in inio.readlines():
143 |
144 | i = i.rstrip('\n')
145 |
146 | if not re.search(pat, i):
147 |
148 | asmatch = re.search(aspat, i)
149 |
150 | xsmatch = re.search(xspat, i)
151 |
152 | if asmatch:
153 |
154 | asscore = int(asmatch.group(1))
155 |
156 | else:
157 |
158 | continue
159 |
160 | if xsmatch:
161 |
162 | xsscore = int(xsmatch.group(1))
163 |
164 | else:
165 |
166 | continue
167 |
168 | if (asscore >= minas) & (xsscore < maxxs):
169 |
170 | mapinfo = i.split('\t')
171 |
172 | seqlist.append(mapinfo[9])
173 |
174 | return seqlist
175 |
176 |
177 | def stop_bwa(p=None):
178 |
179 | """
180 | kill all jellyfish process
181 | :param p: pid of bwa
182 | :return: no return
183 | """
184 |
185 | if p is not None:
186 |
187 | os.kill(p.pid, signal.SIGTERM)
188 |
189 | time.sleep(5)
190 |
191 | else:
192 |
193 | pids = []
194 | p = Popen('ps -A', shell=True, stdout=PIPE)
195 |
196 | lines = p.stdout.readlines()
197 |
198 | for line in lines:
199 |
200 | if b'bwa' in line:
201 |
202 | pids.append(int(line.split()[0]))
203 |
204 | for pid in pids:
205 |
206 | os.kill(pid,signal.SIGTERM)
207 |
208 | time.sleep(10)
209 |
210 |
211 | def bwaloci(bwabin, reffile, inputfile, threadnumber=1):
212 |
213 | pat = re.compile('^@')
214 |
215 | bwabin = subprocesspath.subprocesspath(bwabin)
216 | reffile = subprocesspath.subprocesspath(reffile)
217 | inputfile = subprocesspath.subprocesspath(inputfile)
218 |
219 |
220 | bwacmd = ' '.join([bwabin, 'mem', '-O',' 0',' -B',' 0',' -E',' 0',' -k',' 5', '-t',str(threadnumber), reffile, inputfile])
221 |
222 | print(bwacmd)
223 |
224 | runbwaalign = Popen(bwacmd, shell=True, stdout=PIPE)
225 |
226 | res = list()
227 |
228 | for lin in runbwaalign.stdout.readlines():
229 |
230 | lin = lin.decode('utf-8').rstrip('\n')
231 |
232 | if not re.search(pat, lin):
233 |
234 | infor = lin.split('\t')
235 |
236 | seqnmae = infor[2]
237 |
238 | start = infor[3]
239 |
240 | probeseq = infor[9]
241 |
242 | res.append('\t'.join([probeseq, seqnmae, start]))
243 |
244 | return res
245 |
246 |
247 | def bwafilter(bwabin, reffile, inputfile, minas, maxxs ,threadnumber=1 ):
248 |
249 | pat = re.compile('^@')
250 |
251 | bwabin = subprocesspath.subprocesspath(bwabin)
252 |
253 | reffile = subprocesspath.subprocesspath(reffile)
254 |
255 | inputfile = subprocesspath.subprocesspath(inputfile)
256 |
257 | bwacmd = ' '.join([bwabin, 'mem', '-O',' 0',' -B',' 0',' -E',' 0',' -k',' 5', '-t',str(threadnumber), reffile, inputfile])
258 |
259 | print(bwacmd)
260 |
261 | aspat = re.compile('AS:i:(\d.)')
262 |
263 | xspat = re.compile('XS:i:(\d.)')
264 |
265 | runbwaalign = Popen(bwacmd, shell=True, stdout=PIPE)
266 |
267 | res = list()
268 |
269 | for lin in runbwaalign.stdout.readlines():
270 | # print("before decode",lin)
271 | lin = lin.decode('utf-8').rstrip('\n')
272 | # print("after decode", lin)
273 | if not re.search(pat, lin):
274 |
275 | infor = lin.split('\t')
276 |
277 | seqnmae = infor[2]
278 |
279 | start = infor[3]
280 |
281 | probeseq = infor[9]
282 |
283 | asmatch = re.search(aspat, lin)
284 |
285 | xsmatch = re.search(xspat, lin)
286 |
287 | if asmatch:
288 |
289 | asscore = int(asmatch.group(1))
290 |
291 | else:
292 |
293 | continue
294 |
295 | if xsmatch:
296 |
297 | xsscore = int(xsmatch.group(1))
298 |
299 | else:
300 |
301 | continue
302 |
303 | if (asscore >= minas) & (xsscore < maxxs):
304 |
305 | res.append('\t'.join([probeseq, seqnmae, start]))
306 |
307 |
308 | runbwaalign.stdout.close()
309 |
310 | runbwaalign.wait()
311 |
312 | return res
313 |
314 |
315 | # runbwaalign.communicate()
316 |
317 | def bwareflength(bwabin, reffile):
318 |
319 | pat = re.compile('@SQ')
320 |
321 | bwabin = subprocesspath.subprocesspath(bwabin)
322 |
323 | reffile = subprocesspath.subprocesspath(reffile)
324 |
325 | bwacmd = ' '.join([bwabin, 'mem', reffile, '-'])
326 |
327 | runbwaalign = Popen(bwacmd, shell=True, stdout=PIPE, stdin=PIPE)
328 |
329 | runbwaalign.stdin.write('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'.encode('ascii'))
330 |
331 | runbwaalign.stdin.close()
332 |
333 | seqlength = dict()
334 |
335 | for i in runbwaalign.stdout:
336 |
337 | i = i.decode("utf-8")
338 |
339 | i = i.rstrip('\n')
340 |
341 | if re.search(pat, i):
342 |
343 | (_, seqname, seqlen) = i.split('\t')
344 |
345 | seqname = str(seqname.replace('SN:', ''))
346 |
347 | seqlen = int(seqlen.replace('LN:', ''))
348 |
349 | seqlength[seqname] = seqlen
350 |
351 | return seqlength
352 |
353 | #echo 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' | bwa mem Zea_mays.AGPv3.23.dna.genome.fa -
354 |
355 | if __name__ == '__main__':
356 |
357 |
358 | bwapath = '../bin/bwa/x86_64-Darwin/bwa'
359 |
360 | seqlength = bwareflength(bwapath, '../Test/DM_404.fa')
361 |
362 | print(seqlength)
363 |
364 | # bwaalign(bwapath, '../Test/DM_404.fa', '../Test/Testsampe/oligo_tmp2.fa', '../Test/Testsampe/outfile.sam',4)
365 | # bwaindex(bwapath, '../Test/DM_404.fa', '../Test/Testsampe/')
366 |
367 | # bwapath = subprocesspath.subprocesspath(bwapath)
368 | #
369 |
370 | # seqlist = samfilter('../Test/Testsampe/outfile.sam', minas=45, maxxs=33)
371 | #
372 | # for i in seqlist:
373 | #
374 | # print(i)
375 |
376 | # tester = bwaversion(bwapath)
377 | #
378 | # print(tester)
379 | #
380 | # res = bwaloci(bwapath, '../Test/Testsampe/DM_404.fa', '../Test/Testsampe/DM_test.faprobes.fa',threadnumber=4)
381 | #
382 | # for i in res:
383 | # print(i)
--------------------------------------------------------------------------------
/CMlib/flash_merge.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 659
10 | 236
11 |
12 |
13 |
14 |
15 | 0
16 | 0
17 |
18 |
19 |
20 | Dialog
21 |
22 |
23 | -
24 |
25 |
-
26 |
27 |
28 |
29 | 0
30 | 0
31 |
32 |
33 |
34 |
35 | Arial
36 | 15
37 | 75
38 | true
39 |
40 |
41 |
42 | 1. Load left fastq
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 | 0
51 | 0
52 |
53 |
54 |
55 |
56 | Arial
57 | 15
58 | 75
59 | true
60 |
61 |
62 |
63 | 2. Load right fastq
64 |
65 |
66 |
67 | -
68 |
69 |
70 |
71 | 0
72 | 0
73 |
74 |
75 |
76 |
77 | Arial
78 | 15
79 | 75
80 | true
81 |
82 |
83 |
84 | 3. Set Output Directory
85 |
86 |
87 |
88 | -
89 |
90 |
91 |
92 | 0
93 | 0
94 |
95 |
96 |
97 |
98 | Arial
99 | 15
100 | 75
101 | true
102 |
103 |
104 |
105 | 4. Set Output Name
106 |
107 |
108 |
109 |
110 |
111 | -
112 |
113 |
-
114 |
115 |
-
116 |
117 |
118 |
119 | 0
120 | 0
121 |
122 |
123 |
124 |
125 | -
126 |
127 |
128 |
129 | 0
130 | 0
131 |
132 |
133 |
134 | ...
135 |
136 |
137 |
138 |
139 |
140 | -
141 |
142 |
-
143 |
144 |
145 |
146 | 0
147 | 0
148 |
149 |
150 |
151 |
152 | -
153 |
154 |
155 |
156 | 0
157 | 0
158 |
159 |
160 |
161 | ...
162 |
163 |
164 |
165 |
166 |
167 | -
168 |
169 |
-
170 |
171 |
172 |
173 | 0
174 | 0
175 |
176 |
177 |
178 |
179 | -
180 |
181 |
182 |
183 | 0
184 | 0
185 |
186 |
187 |
188 | ...
189 |
190 |
191 |
192 |
193 |
194 | -
195 |
196 |
-
197 |
198 |
199 |
200 | 0
201 | 0
202 |
203 |
204 |
205 |
206 | -
207 |
208 |
209 |
210 | 0
211 | 0
212 |
213 |
214 |
215 |
216 | Arial
217 | 15
218 | 75
219 | true
220 |
221 |
222 |
223 | 5. CPUs
224 |
225 |
226 |
227 | -
228 |
229 |
230 |
231 | 0
232 | 0
233 |
234 |
235 |
236 | 1
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 | -
245 |
246 |
247 |
248 | Arial
249 | 15
250 |
251 |
252 |
253 | background-color: rgb(154, 25, 6);
254 |
255 |
256 | Merge
257 |
258 |
259 |
260 | -
261 |
262 |
263 |
264 | 0
265 | 0
266 |
267 |
268 |
269 |
270 | Arial
271 | 24
272 | 75
273 | true
274 |
275 |
276 |
277 | Merge Sequence
278 |
279 |
280 | Qt::AlignCenter
281 |
282 |
283 |
284 |
285 | labelname
286 | label
287 | label_2
288 | left
289 | right
290 | leftbtn
291 | rightbtn
292 | label_3
293 | label_4
294 | output
295 | outputbtn
296 | name
297 | pushButton
298 | left
299 | name
300 | label_5
301 | label_4
302 |
303 |
304 |
305 |
306 |
--------------------------------------------------------------------------------
/CMlib/plot_each_bam_filter.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import pysam
3 | from pyfasta import Fasta
4 | import matplotlib
5 | from scipy import stats
6 | import re
7 | import matplotlib.pyplot as plt
8 | import pandas as pd
9 | import numpy as np
10 | from PyQt5 import QtWidgets
11 |
12 |
13 |
14 | def caldel(samfilename, start, end, genename, filter):
15 | """
16 |
17 | :param samfilename: path and name of each bam file
18 | :param start: setting the start site of deletion calculation
19 | :param end: setting the end site of deletion calculation
20 | :param genename: the name of genome-editing target region
21 | :return:
22 | """
23 | n = 0
24 |
25 | mutateinfor = dict()
26 |
27 | deletelent = dict()
28 | samfile = pysam.AlignmentFile(samfilename, 'r')
29 | for read in samfile.fetch(genename):
30 |
31 | # print(read.cigartuples, read.cigarstring, read.reference_start, read.cigartuples[0][1], read.cigartuples[0][1]+read.reference_start)
32 |
33 |
34 | nowsite = read.reference_start
35 | # print(read.cigarstring)
36 | for cigarnow in read.cigartuples:
37 | # print(cigarnow)
38 | cigartype = cigarnow[0]
39 | # print(cigartype)
40 | cigarlenght = cigarnow[1]
41 |
42 | cigarend = nowsite + cigarlenght
43 |
44 | if start < nowsite < end:
45 |
46 | if cigartype == 2:
47 |
48 | if cigarlenght < (end - start):
49 |
50 | if cigarlenght in deletelent:
51 |
52 | deletelent[cigarlenght] += 1
53 | else:
54 | deletelent[cigarlenght] = 1
55 |
56 | for i in range(nowsite, cigarend):
57 |
58 | if i in mutateinfor:
59 |
60 | if cigartype in mutateinfor[i]:
61 |
62 | mutateinfor[i][cigartype] += 1
63 |
64 | else:
65 |
66 | mutateinfor[i][cigartype] = 1
67 |
68 | else:
69 |
70 | mutateinfor[i] = dict()
71 |
72 | mutateinfor[i][cigartype] = 1
73 |
74 | nowsite += cigarlenght
75 |
76 | n += 1
77 |
78 | mutateinforpd = pd.DataFrame.from_dict(mutateinfor, orient='index').fillna(value=0)
79 | mutateinforpd['sum'] = mutateinforpd.sum(axis=1)
80 | # print(mutateinforpd[2],mutateinforpd['sum'])
81 | mutateinforpd['delrate'] = mutateinforpd[2]/(mutateinforpd['sum']-filter) * 100
82 | deletelentpd = pd.DataFrame.from_dict(deletelent, orient='index')
83 |
84 | return (mutateinforpd, deletelentpd)
85 |
86 | def barchart_filter(infofile,groupinfo,refname, output, bamdir):
87 | """
88 |
89 | :param infofile: a description file of details of each sample, example: sample_infor.txt
90 | :param groupinfo: a description file of details of each group, example: group_infor.txt
91 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa
92 | :param output: folder of final result
93 | :param bamdir: folder of temporary files
94 | :return:
95 | """
96 | datainfo = pd.read_csv(infofile, index_col="Index")
97 | groupinfor = pd.read_csv(groupinfo)
98 | filterfile = os.path.join(output, 'filter_wt_reads_number.txt')
99 | filterinfor= pd.read_table(filterfile)
100 | stranddict = dict()
101 | filter_dict = dict()
102 | for idz in filterinfor.index:
103 | filter_dict[filterinfor.loc[idz]['Sample']] = filterinfor.loc[idz]['filter']
104 |
105 | for idy in groupinfor.index:
106 | stranddict[groupinfor.loc[idy].rep1] = groupinfor.loc[idy].strand
107 | stranddict[groupinfor.loc[idy].rep2] = groupinfor.loc[idy].strand
108 | stranddict[groupinfor.loc[idy].rep3] = groupinfor.loc[idy].strand
109 | stranddict[groupinfor.loc[idy].control] = groupinfor.loc[idy].strand
110 | fa = Fasta(refname)
111 | for idx in datainfo.index:
112 | note = datainfo.loc[idx].Note
113 |
114 | if note not in stranddict:
115 | error = ' '.join([note, 'is not involved in group table! Please Check!'])
116 | showwarnings("Error", error)
117 | continue
118 |
119 | strand = stranddict[note]
120 | type = ''
121 | if (re.search("gRNA", datainfo.loc[idx].Note)):
122 | if strand == '+':
123 | start = datainfo.loc[idx]['start'] - 10
124 | end = datainfo.loc[idx]['end'] + 10
125 | type = "gf"
126 | else:
127 | start = datainfo.loc[idx]['start'] - 10
128 | end = datainfo.loc[idx]['end'] + 10
129 | type = "gr"
130 | elif (re.search("crRNA", datainfo.loc[idx].Note)):
131 | if strand == '+':
132 | start = datainfo.loc[idx]['start'] - 10
133 | end = datainfo.loc[idx]['end'] + 30
134 | type = "cf"
135 | else:
136 | start = datainfo.loc[idx]['start'] - 30
137 | end = datainfo.loc[idx]['end'] + 10
138 | type = "cr"
139 | # if (re.search("gRNA", datainfo.loc[idx].Note)):
140 | # start = datainfo.loc[idx].start - 10
141 | # end = datainfo.loc[idx].end + 10
142 | # elif (re.search("crRNA", datainfo.loc[idx].Note)):
143 | # start = datainfo.loc[idx].start
144 | # end = datainfo.loc[idx].end + 30
145 | #print(start, end)
146 | bamfile = os.path.join(bamdir, note + '.bam')
147 | pdffile = os.path.join(output, note + '.pdf')
148 | graphcsv= os.path.join(bamdir,note + '.graph.csv')
149 | pamcsv = os.path.join(bamdir, note + '.pam.csv')
150 |
151 |
152 | #print(bamfile)
153 | genename = datainfo.loc[idx].gene_name
154 | seq = fa[genename][start - 1:end].upper()
155 | if strand == '-':
156 | seq=DNA_reverse(DNA_complement(seq))
157 | seqlist = list()
158 | seqlistPAM = list()
159 | seqlistother = list()
160 |
161 | for nt in seq:
162 | seqlist.append(nt)
163 |
164 | filter_read = filter_dict[datainfo.loc[idx].Note]
165 | (mutateinforpd, deletelentpd) = caldel(samfilename=bamfile, start=start, end=end, genename=genename, filter=filter_read)
166 |
167 | #print(mutateinforpd)
168 | #print(filter_read)
169 | reg = mutateinforpd.loc[start:end]
170 | regPAM = list()
171 | regother = list()
172 |
173 | if type == 'gf':
174 | seqlistPAM = seqlist[-13:-10]
175 | seqlistother = seqlist
176 | seqlistother[-13:-10] = ['', '', '']
177 | regPAM = reg[-13:-10]
178 | if type == 'gr':
179 | seqlistPAM = seqlist[-13:-10]
180 | seqlistother = seqlist
181 | seqlistother[-13:-10] = ['', '', '']
182 | regPAM = reg[-13:-10]
183 | # if type == 'gr':
184 | # seqlistPAM = seqlist[10:13]
185 | # seqlistother = seqlist
186 | # seqlistother[10:13] = ['', '', '']
187 | # regPAM = reg[10:13]
188 | lenth = end - start
189 | if (type == 'cf' or type == 'cr') and lenth == 65:
190 | seqlistPAM = seqlist[10:13]
191 | seqlistother = seqlist
192 | seqlistother[10:13] = ['', '', '']
193 | regPAM = reg[10:13]
194 | if (type == 'cf' or type == 'cr') and lenth == 66:
195 | seqlistPAM = seqlist[10:14]
196 | seqlistother = seqlist
197 | seqlistother[10:14] = ['', '', '', '']
198 | regPAM = reg[10:14]
199 | # if type == 'cf':
200 | # seqlistPAM = seqlist[0:4]
201 | # seqlistother = seqlist
202 | # seqlistother[0:4] = ['', '', '', '']
203 | # regPAM = reg[0:4]
204 | # if type == 'cr':
205 | # seqlistPAM = seqlist[0:4]
206 | # seqlistother = seqlist
207 | # seqlistother[0:4] = ['', '', '', '']
208 | # regPAM = reg[0:4]
209 | # if type == 'cr':
210 | # seqlistPAM = seqlist[-4:]
211 | # seqlistother = seqlist
212 | # seqlistother[-4:] = ['', '', '', '']
213 | # regPAM = reg[-4:]
214 | #print(reg)
215 | fig, ax = plt.subplots()
216 | y=reg.delrate
217 | if strand == '-':
218 | y=y[::-1]
219 | ax.bar(reg.index, y, color='blue')
220 | ax.set_title(note)
221 | ax.set_xticks(reg.index, minor=True)
222 | ax.set_xticklabels(seqlistother, color="black", minor=True, fontdict = {'family': 'Arial'}, size = 5) # minor=True表示次坐标轴
223 | ax.set_xticks(regPAM.index)
224 | ax.set_xticklabels(seqlistPAM, color="red", fontdict = {'family': 'Arial'}, size = 5)
225 | # ax.set_xticks(reg.index)
226 | # ax.set_xticklabels(seqlist)
227 | # plt.show()
228 | plt.savefig(pdffile, dpi=300, format="pdf")
229 | plt.close(fig)
230 | print(pdffile, "done!")
231 |
232 |
233 | ####output cvs in tmpfile
234 | ratio_final=list(y) #直接饮用y不会按照方向,必须调整成list才可以
235 | reg['label'] = seqlistother ##合并X横坐标到reg 框架中
236 | reg['ratio'] = ratio_final
237 | reg.to_csv(graphcsv,index=True, index_label="Index")
238 | regPAM['label'] = seqlistPAM
239 | regPAM.to_csv(pamcsv,index=True, index_label="Index")
240 | # print('seqlistother =',seqlistother,sep=' ', file=graphfile)
241 | # print('regPAM =', list(regPAM.index), sep=' ', file=graphfile)
242 | # print('seqlistPAM =', seqlistPAM, sep=' ', file=graphfile)
243 | # print('strand =',strand, sep=' ',file=graphfile)
244 | # graphfile.close()
245 |
246 |
247 |
248 |
249 | def DNA_complement(sequence):
250 | sequence = sequence.upper()
251 | sequence = sequence.replace('A', 't')
252 | sequence = sequence.replace('T', 'a')
253 | sequence = sequence.replace('C', 'g')
254 | sequence = sequence.replace('G', 'c')
255 | return sequence.upper()
256 |
257 |
258 | def DNA_reverse(sequence):
259 | sequence = sequence.upper()
260 | return sequence[::-1]
261 |
262 |
263 | # ############## warning message #########
264 | def showwarnings(title, message):
265 | wBox = QtWidgets.QMessageBox()
266 | wBox.setIcon(QtWidgets.QMessageBox.Warning)
267 | wBox.setWindowTitle(title)
268 | wBox.setText(message)
269 | wBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
270 | wBox.exec_()
271 | ##################################################
--------------------------------------------------------------------------------
/CMlib/plot_pdf_filter.py:
--------------------------------------------------------------------------------
1 | import pysam
2 | from pyfasta import Fasta
3 | import matplotlib
4 | from scipy import stats
5 | from os import path
6 | import matplotlib.pyplot as plt
7 | import pandas as pd
8 | import numpy as np
9 | import re
10 | import os
11 |
12 |
13 |
14 | def caldel(samfilename, start, end, genename, filter):
15 | """
16 |
17 | :param samfilename: path and name of each bam file
18 | :param start: setting the start site of deletion calculation
19 | :param end: setting the end site of deletion calculation
20 | :param genename: the name of genome-editing target region
21 | :return:
22 | """
23 | n = 0
24 |
25 | mutateinfor = dict()
26 |
27 | deletelent = dict()
28 | samfile = pysam.AlignmentFile(samfilename, 'r')
29 | for read in samfile.fetch(genename):
30 |
31 | # print(read.cigartuples, read.cigarstring, read.reference_start, read.cigartuples[0][1], read.cigartuples[0][1]+read.reference_start)
32 |
33 |
34 | nowsite = read.reference_start
35 | # print(read.cigarstring)
36 | for cigarnow in read.cigartuples:
37 | # print(cigarnow)
38 | cigartype = cigarnow[0]
39 | # print(cigartype)
40 | cigarlenght = cigarnow[1]
41 |
42 | cigarend = nowsite + cigarlenght
43 |
44 | if start < nowsite < end:
45 |
46 | if cigartype == 2:
47 |
48 | if cigarlenght < (end - start):
49 |
50 | if cigarlenght in deletelent:
51 |
52 | deletelent[cigarlenght] += 1
53 | else:
54 | deletelent[cigarlenght] = 1
55 |
56 | for i in range(nowsite, cigarend):
57 |
58 | if i in mutateinfor:
59 |
60 | if cigartype in mutateinfor[i]:
61 |
62 | mutateinfor[i][cigartype] += 1
63 |
64 | else:
65 |
66 | mutateinfor[i][cigartype] = 1
67 |
68 | else:
69 |
70 | mutateinfor[i] = dict()
71 |
72 | mutateinfor[i][cigartype] = 1
73 |
74 | nowsite += cigarlenght
75 |
76 | n += 1
77 |
78 | mutateinforpd = pd.DataFrame.from_dict(mutateinfor, orient='index').fillna(value=0)
79 | mutateinforpd['sum'] = mutateinforpd.sum(axis=1)
80 | mutateinforpd['delrate'] = mutateinforpd[2]/(mutateinforpd['sum'] - filter)
81 | deletelentpd = pd.DataFrame.from_dict(deletelent, orient='index')
82 |
83 | return (mutateinforpd, deletelentpd)
84 |
85 | def plotpdf_filter(groupinfo, refname, output, bamdir):
86 |
87 | """
88 |
89 | :param groupinfo: a description file of details of each group, example: group_infor.txt
90 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa
91 | :param output: folder of final result
92 | :param bamdir: folder of temporary files
93 | :return:
94 | """
95 |
96 | groupinfor = pd.read_csv(groupinfo)
97 | #groupinfor = groupinfor.dropna(axis=0, how='any')
98 | groupinfor = groupinfor.fillna("UNKNOWN")
99 | filterfile = os.path.join(output, 'filter_wt_reads_number.txt')
100 | filterinfor = pd.read_table(filterfile)
101 | filter_dict = dict()
102 | for idz in filterinfor.index:
103 | filter_dict[filterinfor.loc[idz]['Sample']] = filterinfor.loc[idz]['filter']
104 | fa = Fasta(refname)
105 |
106 | for idx in groupinfor.index:
107 |
108 | repbam1 = os.path.join(bamdir, groupinfor.loc[idx]['rep1'] + '.bam')
109 | repbam2 = os.path.join(bamdir, groupinfor.loc[idx]['rep2'] + '.bam')
110 | ckbam = os.path.join(bamdir, groupinfor.loc[idx]['control'] + '.bam')
111 |
112 | strand = groupinfor.loc[idx]['strand']
113 | type = ''
114 | if (re.search("gRNA", groupinfor.loc[idx]['group'])):
115 | if strand == '+':
116 | start = groupinfor.loc[idx]['start'] - 10
117 | end = groupinfor.loc[idx]['end'] + 10
118 | type = 'gf'
119 | else:
120 | start = groupinfor.loc[idx]['start'] - 10
121 | end = groupinfor.loc[idx]['end'] + 10
122 | type = 'gr'
123 |
124 | elif (re.search("crRNA", groupinfor.loc[idx]['group'])):
125 | if strand == '+':
126 | start = groupinfor.loc[idx]['start'] - 10
127 | end = groupinfor.loc[idx]['end'] + 30
128 | type = 'cf'
129 | else:
130 | start = groupinfor.loc[idx]['start'] - 30
131 | end = groupinfor.loc[idx]['end'] + 10
132 | type = 'cr'
133 | genename = groupinfor.loc[idx]['gene']
134 | namenow = groupinfor.loc[idx]['group']
135 |
136 |
137 | #if (path.exists(repbam1) and path.exists(repbam2)) and path.exists(ckbam):
138 | if (path.exists(repbam1) and path.exists(repbam2)):
139 |
140 | #print(repbam1, repbam2, ckbam, start, end, genename, namenow)
141 |
142 | seq = fa[genename][start - 1:end].upper()
143 | if strand == '-':
144 | seq = DNA_reverse(DNA_complement(seq))
145 |
146 | seqlist = list()
147 |
148 | for nt in seq:
149 | seqlist.append(nt)
150 |
151 | filter_read1 = filter_dict[groupinfor.loc[idx]['rep1']]
152 | filter_read2 = filter_dict[groupinfor.loc[idx]['rep2']]
153 |
154 |
155 | (mutateinforpd1, deletelentpd1) = caldel(samfilename=repbam1, start=start, end=end, genename=genename, filter=filter_read1)
156 | (mutateinforpd2, deletelentpd2) = caldel(samfilename=repbam2, start=start, end=end, genename=genename, filter=filter_read2)
157 |
158 | rep1 = mutateinforpd1.loc[start:end].delrate
159 | rep2 = mutateinforpd2.loc[start:end].delrate
160 | reg = pd.concat([rep1, rep2], axis=1)
161 |
162 | regmean = reg.mean(axis=1)
163 | stdrr = reg.sem(axis=1)
164 |
165 |
166 |
167 | seqlistPAM = list()
168 | seqlistother = list()
169 | regPAM = list()
170 | if type == 'gf':
171 | seqlistPAM = seqlist[-13:-10]
172 | seqlistother = seqlist
173 | seqlistother[-13:-10] = ['', '', '']
174 | regPAM = regmean[-13:-10]
175 | if type == 'gr':
176 | seqlistPAM = seqlist[-13:-10]
177 | seqlistother = seqlist
178 | seqlistother[-13:-10] = ['', '', '']
179 | regPAM = regmean[-13:-10]
180 | # if type == 'gr':
181 | # seqlistPAM = seqlist[10:13]
182 | # seqlistother = seqlist
183 | # seqlistother[10:13] = ['', '', '']
184 | # regPAM = regmean[10:13]
185 | lenth = end - start
186 | if (type == 'cf' or type == 'cr') and lenth == 65:
187 | seqlistPAM = seqlist[10:13]
188 | seqlistother = seqlist
189 | seqlistother[10:13] = ['', '', '']
190 | regPAM = reg[10:13]
191 | if (type == 'cf' or type == 'cr') and lenth == 66:
192 | seqlistPAM = seqlist[10:14]
193 | seqlistother = seqlist
194 | seqlistother[10:14] = ['', '', '', '']
195 | regPAM = reg[10:14]
196 | # if type == 'cf':
197 | # seqlistPAM = seqlist[0:4]
198 | # seqlistother = seqlist
199 | # seqlistother[0:4] = ['', '', '', '']
200 | # regPAM = regmean[0:4]
201 | # if type == 'cr':
202 | # seqlistPAM = seqlist[0:4]
203 | # seqlistother = seqlist
204 | # seqlistother[0:4] = ['', '', '', '']
205 | # regPAM = regmean[0:4]
206 | # if type == 'cr':
207 | # seqlistPAM = seqlist[-4:]
208 | # seqlistother = seqlist
209 | # seqlistother[-4:] = ['', '', '', '']
210 | # regPAM = regmean[-4:]
211 |
212 |
213 | pdfname = os.path.join(output, namenow + '.pdf')
214 |
215 |
216 | fig, (ax0, ax1) = plt.subplots(ncols=2, sharey=True, figsize=(16, 9))
217 | # ax0.bar(regmean.index, regmean, yerr=stdrr)
218 | y = regmean
219 | if strand == '-':
220 | y=y[::-1]
221 | y_std = stdrr
222 | if strand == '-':
223 | y_std=y_std[::-1]
224 | ax0.bar(regmean.index, y, color='purple')
225 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color
226 | ax0.errorbar(regmean.index, y, yerr=y_std, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None',
227 | ecolor='black')
228 | ax0.set_title(namenow)
229 |
230 | ax0.set_xticks(regmean.index, minor=True)
231 | ax0.set_xticklabels(seqlistother, color="black", minor=True) # minor=True表示次坐标轴
232 | ax0.set_xticks(regPAM.index)
233 | ax0.set_xticklabels(seqlistPAM, color="red")
234 |
235 | # ax0.set_xticks(regmean.index)
236 | # ax0.set_xticklabels(seqlist)
237 | # ax0.tick_params(labelsize=8)
238 |
239 | if path.exists(ckbam):
240 | filter_CK = filter_dict[groupinfor.loc[idx]['control']]
241 | (mutateinforpdCK, deletelentpdCK) = caldel(samfilename=ckbam, start=start, end=end, genename=genename, filter=filter_CK)
242 | ckname = namenow + ' Control'
243 | #pdfname = os.path.join(output, namenow + '.pdf')
244 | regck = mutateinforpdCK.loc[start:end]
245 |
246 | y_ck = regck.delrate
247 | if strand == '-':
248 | y_ck = y_ck[::-1]
249 | else:
250 | ckname = namenow + ' Contron_Unknown'
251 | regck = regmean
252 | y_ck = regmean - regmean
253 | ax1.bar(regck.index, y_ck, color='grey')
254 | ax1.set_title(ckname)
255 | ax1.set_xticks(regck.index,minor=True)
256 | ax1.set_xticklabels(seqlist,minor=True)
257 | ax1.set_xticks(regPAM.index)
258 | ax1.set_xticklabels(seqlistPAM, color="red")
259 | #ax1.tick_params(labelsize=8)
260 | # plt.show()
261 |
262 | plt.savefig(pdfname)
263 | plt.close(fig)
264 | print("group",namenow, "finished!")
265 |
266 | def DNA_complement(sequence):
267 | sequence = sequence.upper()
268 | sequence = sequence.replace('A', 't')
269 | sequence = sequence.replace('T', 'a')
270 | sequence = sequence.replace('C', 'g')
271 | sequence = sequence.replace('G', 'c')
272 | return sequence.upper()
273 |
274 |
275 | def DNA_reverse(sequence):
276 | sequence = sequence.upper()
277 | return sequence[::-1]
--------------------------------------------------------------------------------
/CMlib/output_aln_fa_filter.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pysam
3 | from pyfasta import Fasta
4 | import matplotlib
5 | from scipy import stats
6 | import matplotlib.pyplot as plt
7 | import pandas as pd
8 | import numpy as np
9 | import re
10 | from glob import glob
11 | from CMlib.showprocess import showbarprocess
12 | from PyQt5 import QtWidgets
13 |
14 | def alnfile_filter(infofile,groupinfo, refname, output, bamdir):
15 | """
16 | :param infofile: a description file of details of each sample, example: sample_infor.txt
17 | :param groupinfo: a description file of details of each group, example: group_infor.txt
18 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa
19 | :param output: folder of final result
20 | :param bamdir: folder of temporary files
21 | :return:
22 | """
23 | fa = Fasta(refname)
24 | info = pd.read_csv(infofile, index_col="Index")
25 | groupinfor = pd.read_csv(groupinfo)
26 | stranddict = dict()
27 | # outiofile = os.path.join(output,'filter_wt_reads_number.txt')
28 | # outio = open(outiofile, 'w')
29 | # print("Sample\tfilter", file=outio)
30 | for idy in groupinfor.index:
31 | stranddict[groupinfor.loc[idy].rep1] = groupinfor.loc[idy].strand
32 | stranddict[groupinfor.loc[idy].rep2] = groupinfor.loc[idy].strand
33 | stranddict[groupinfor.loc[idy].rep3] = groupinfor.loc[idy].strand
34 | stranddict[groupinfor.loc[idy].control] = groupinfor.loc[idy].strand
35 |
36 | for idx in info.index:
37 |
38 | note = info.loc[idx].Note
39 | if note not in stranddict:
40 | error = ' '.join([note, 'is not involved in group table! Please Check!'])
41 | showwarnings("Error", error)
42 | continue
43 |
44 | bamname = os.path.join(bamdir, info.loc[idx].Note + '.bam')
45 | outfile_del = os.path.join(output, info.loc[idx].Note + '_del_aln.fa')
46 | outfile_snp = os.path.join(output, info.loc[idx].Note + '_snp_aln.fa')
47 | alnfile_del = os.path.join(output, info.loc[idx].Note + '_del_aln.txt')
48 | alnfile_snp = os.path.join(output, info.loc[idx].Note + '_snp_aln.txt')
49 | print("output", info.loc[idx].Note)
50 | ################
51 | tmp = "output " + info.loc[idx].Note
52 | showbarprocess(tmp)
53 | ###############
54 |
55 | outfa_del = open(outfile_del, 'w')
56 | outfa_snp = open(outfile_snp, 'w')
57 | outlan_del = open(alnfile_del, 'w')
58 | outlan_snp = open(alnfile_snp, 'w')
59 |
60 | note = info.loc[idx].Note
61 | strand = stranddict[note]
62 |
63 | if (re.search("gRNA", info.loc[idx].Note)):
64 | if strand == '+':
65 | start = info.loc[idx]['start'] - 10
66 | end = info.loc[idx]['end'] + 10
67 |
68 | else:
69 | start = info.loc[idx]['start'] - 10
70 | end = info.loc[idx]['end'] + 10
71 |
72 | elif (re.search("crRNA", info.loc[idx].Note)):
73 | if strand == '+':
74 | start = info.loc[idx]['start'] - 10
75 | end = info.loc[idx]['end'] + 30
76 |
77 | else:
78 | start = info.loc[idx]['start'] - 30
79 | end = info.loc[idx]['end'] + 10
80 |
81 | # if (re.search("gRNA", info.loc[idx].Note)):
82 | # start = info.loc[idx].start - 10
83 | # end = info.loc[idx].end + 10
84 | # elif (re.search("crRNA", info.loc[idx].Note)):
85 | # start = info.loc[idx].start
86 | # end = info.loc[idx].end + 30
87 | #start = info.loc[idx].start - 10
88 | #end = info.loc[idx].end - 10
89 | gene = info.loc[idx].gene_name
90 | samfile = pysam.AlignmentFile(bamname, "rb")
91 | mtreads = set()
92 | totalcov = 0
93 | covage = 0
94 |
95 | replace = set()
96 | replace_left = set()
97 | replace_final = set()
98 | all_tmp = set()
99 | wt_set = set()
100 | replace_side = set()
101 | wt_side_set = set()
102 | wt_final_set = set()
103 | filter_set = set()
104 |
105 | insert = set()
106 |
107 | deletion = set()
108 |
109 | reads = dict()
110 |
111 | seq = fa[gene][start - 1:end].upper() ##reference sequence
112 | seqlist = list()
113 | for nt in seq:
114 | seqlist.append(nt)
115 |
116 | for pileupcolumn in samfile.pileup(gene, max_depth=50000):
117 |
118 | # print (pileupcolumn.pos, pileupcolumn.n)
119 |
120 |
121 |
122 | totalcov += pileupcolumn.n
123 | # print(pileupcolumn.pos, pileupcolumn.n)
124 |
125 | if end > pileupcolumn.pos >= start-1:
126 |
127 | for pileupread in pileupcolumn.pileups:
128 | # print(pileupcolumn.pos, pileupcolumn.n)
129 |
130 | if pileupread.alignment.query_name not in reads:
131 | # print(pileupread.alignment.query_name)
132 | reads[pileupread.alignment.query_name] = ''
133 |
134 | if not pileupread.is_del and not pileupread.is_refskip:
135 | refbase = fa[gene][pileupcolumn.pos].upper()
136 | querybase = pileupread.alignment.query_sequence[pileupread.query_position]
137 | all_tmp.add(pileupread.alignment.query_name)
138 | if querybase != refbase:
139 | replace.add(pileupread.alignment.query_name)
140 |
141 | reads[pileupread.alignment.query_name] += pileupread.alignment.query_sequence[
142 | pileupread.query_position]
143 | # print(reads[pileupread.alignment.query_name])
144 |
145 | # print(pileupread.query_position)
146 | # querybase = pileupread.alignment.query_sequence[pileupread.query_position]
147 |
148 | # # refbase = pileupread.alignment.get_reference_sequence()[pileupread.query_position]
149 | # refbase = fa[gene][pileupcolumn.pos].upper()
150 | # if querybase !=refbase :
151 | # # replace += 1
152 | # mtreads.add(pileupread.alignment.query_name)
153 | # replace.add(pileupread.alignment.query_name)
154 |
155 | # if pileupread.indel > 0:
156 |
157 | # # insert += 1
158 | # mtreads.add(pileupread.alignment.query_name)
159 | # insert.add(pileupread.alignment.query_name)
160 | # print()
161 |
162 | if pileupread.indel < 0:
163 | reads[pileupread.alignment.query_name] += '-' * abs(pileupread.indel)
164 | deletion.add(pileupread.alignment.query_name)
165 | # print(reads[pileupread.alignment.query_name])
166 | # print(reads)
167 | # # deletion += 1
168 | # mtreads.add(pileupread.alignment.query_name)
169 | # deletion.add(pileupread.alignment.query_name)
170 |
171 | wt_set = all_tmp - replace
172 | for pileupcolumn_filter in samfile.pileup(gene, max_depth=50000): ###两边也无突变
173 |
174 | if start > pileupcolumn_filter.pos >= 0 or pileupcolumn_filter.pos > end:
175 | for pileupread_filter in pileupcolumn_filter.pileups:
176 | # for replace_filter in replace_all:
177 |
178 | # if replace_filter in str(pileupread_filter) :
179 | # replace_side.add(pileupread_filter.alignment.query_name)
180 |
181 | if pileupread_filter.alignment.query_name not in replace_left:
182 |
183 | if not pileupread_filter.is_del and not pileupread_filter.is_refskip:
184 | querybase_filter = pileupread_filter.alignment.query_sequence[pileupread_filter.query_position]
185 |
186 | # refbase = pileupread.alignment.get_reference_sequence()[pileupread_filter.query_position]
187 |
188 | refbase_filter = fa[gene][pileupcolumn_filter.pos].upper()
189 | replace_side.add(pileupread_filter.alignment.query_name) # 两边无突变
190 | if querybase_filter != refbase_filter:
191 | # replace += 1
192 | # mtreads.add(pileupread.alignment.query_name)
193 |
194 | # replace.add(pileupread.alignment.query_name)
195 | replace_left.add(pileupread_filter.alignment.query_name) # 两边无突变,有错配
196 | # break
197 |
198 |
199 | wt_side_set = replace_side - replace_left
200 | wt_final_set = wt_side_set & wt_set
201 | filter_set = wt_set - wt_side_set
202 | replace_final = replace - deletion
203 |
204 |
205 | lt = end - start + 1
206 | # print(lt)
207 | typdict = dict()
208 | typdict_snp = dict()
209 | typdict_del = dict()
210 | for i in reads:
211 | if i in filter_set:
212 | continue
213 | if len(reads[i]) == lt:
214 | # print(reads[i])
215 | if i in replace_final:
216 | if reads[i] in typdict_snp:
217 | typdict_snp[reads[i]] += 1
218 | else:
219 | typdict_snp[reads[i]] = 1
220 | continue
221 | if i in deletion:
222 | if reads[i] in typdict_del:
223 | typdict_del[reads[i]] += 1
224 | else:
225 | typdict_del[reads[i]] = 1
226 | continue
227 |
228 | if reads[i] in typdict:
229 | typdict[reads[i]] += 1
230 | else:
231 | typdict[reads[i]] = 1
232 | for mutype in typdict:
233 | print('>', typdict[mutype], sep='', file=outfa_snp)
234 | print(mutype, file=outfa_snp)
235 | print(typdict[mutype], '\t'.join(mutype), sep='\t', file=outlan_snp)
236 | print('>', typdict[mutype], sep='', file=outfa_del)
237 | print(mutype, file=outfa_del)
238 | print(typdict[mutype], '\t'.join(mutype), sep='\t', file=outlan_del)
239 | for mutype_snp in typdict_snp:
240 | print('>', typdict_snp[mutype_snp], sep='', file=outfa_snp)
241 | print(mutype_snp, file=outfa_snp)
242 | print(typdict_snp[mutype_snp], '\t'.join(mutype_snp), sep='\t', file=outlan_snp)
243 | for mutype_del in typdict_del:
244 | print('>', typdict_del[mutype_del], sep='', file=outfa_del)
245 | print(mutype_del, file=outfa_del)
246 | print(typdict_del[mutype_del], '\t'.join(mutype_del), sep='\t', file=outlan_del)
247 | print("Refseq",'\t'.join(seqlist), sep='\t',file=outlan_snp)
248 | print("Refseq", '\t'.join(seqlist), sep='\t', file=outlan_del)
249 |
250 |
251 | # print(info.loc[idx].Note, end='\t', file=outio)
252 | # print(len(filter_set), end='\n', file=outio)
253 | outfa_snp.close()
254 | outlan_snp.close()
255 | outfa_del.close()
256 | outlan_del.close()
257 | #outio.close()
258 |
259 | # ############## warning message #########
260 | def showwarnings(title, message):
261 | wBox = QtWidgets.QMessageBox()
262 | wBox.setIcon(QtWidgets.QMessageBox.Warning)
263 | wBox.setWindowTitle(title)
264 | wBox.setText(message)
265 | wBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
266 | wBox.exec_()
267 | ##################################################
--------------------------------------------------------------------------------
/CMlib/Barplot_deletion_filter.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pysam
3 | from pyfasta import Fasta
4 | import matplotlib
5 | from scipy import stats
6 | import re
7 | import matplotlib.pyplot as plt
8 | import pandas as pd
9 | import numpy as np
10 | from os import path
11 | from matplotlib.ticker import MultipleLocator, FormatStrFormatter
12 |
13 |
14 | def deletion_len(samfilename, genename, output):
15 | outfile_del = os.path.join(output, samfilename + '_del_aln.fa')
16 | mut_file = os.path.join(output,'mut_rate.all.txt')
17 | fw = open(outfile_del,'r')
18 | lenth = dict()
19 | for i in range(1,21):
20 | lenth[i] = 0
21 | lenth['>21'] = 0
22 | for line in fw:
23 | a = line.strip()
24 | if a.startswith('>'):
25 | count_tmp = a.split('>')[1]
26 | else:
27 | delete = re.findall(r"--*",a)
28 | if delete:
29 | for i in delete:
30 | size = len(i)
31 | if size > 20:
32 | lenth['>21'] += int(count_tmp)
33 | else:
34 | lenth[len(i)] +=int(count_tmp)
35 | else:
36 | if re.findall(r"\w-\w|^-\w|\w-$",a):
37 | lenth['1'] += int(count_tmp)
38 | data = pd.read_table(mut_file,index_col=0)
39 | deletelentpd=pd.DataFrame.from_dict(lenth, orient='index')
40 | deletion_sum=deletelentpd.iloc[:, 0].sum() ##统计所有deletion
41 | # deletion_sum = data.loc[samfilename].total_read_count
42 | deletelentpd['ratio'] = deletelentpd[0]/deletion_sum
43 |
44 | deletelentpd_p = pd.DataFrame.from_dict(lenth, orient='index')
45 | deletion_sum_p=deletelentpd.iloc[:, 0].sum() ##统计所有deletion
46 | # deletion_sum_p = data.loc[samfilename].total_read_count ##统计所有deletion
47 | deletelentpd_p['ratio'] = deletelentpd_p[0]/deletion_sum_p*100
48 | return(deletelentpd,deletelentpd_p)
49 |
50 | def deletionbarplot(regmean,stdrr,namenow,deletelentpdall,pdfname):
51 | fig, ax = plt.subplots()
52 | x = np.array(range(1, 22))
53 | ymajorFormatter = FormatStrFormatter('%1.1f') ## 设置坐标轴格式
54 | ax.yaxis.set_major_formatter(ymajorFormatter)
55 | ax.bar(x, regmean, color='red')
56 | # add errorbar, elinewidth:errorbar line with; capsize/capthick:上下横线长短/粗细,ls:linestyle='None'去掉连接线。 ecolor: errorbar line color
57 | ax.errorbar(x, regmean, yerr=stdrr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None',ecolor='black')
58 | ax.set_title(namenow, size=15, fontdict={'family': 'Times New Roman'})
59 | ax.set_ylabel('Deletion Size (%)', size=15, fontdict={'family': 'Times New Roman'})
60 | ax.set_xticks(x)
61 | ax.set_xticklabels(deletelentpdall.index, rotation=35, fontdict={'family': 'Arial'}, size=5)
62 | #ax.legend((bar1[0], bar2[0], bar3[0]), ('rep1', 'rep2', 'control'))
63 | plt.savefig(pdfname)
64 | plt.close(fig)
65 | print("group", namenow, "deletion size finished!")
66 |
67 | def barchart_filter(groupinfo, output, bamdir):
68 |
69 | groupinfor = pd.read_csv(groupinfo)
70 | #print(groupinfor)
71 | #groupinfor = groupinfor.dropna(axis=0, how='any',thresh=7)
72 | groupinfor = groupinfor.fillna("UNKNOWN") ##填充表格中NaN处
73 | #print(groupinfor)
74 |
75 | for idx in groupinfor.index:
76 |
77 | repbam1 = os.path.join(bamdir, groupinfor.loc[idx]['rep1'] + '.bam')
78 | repbam2 = os.path.join(bamdir, groupinfor.loc[idx]['rep2'] + '.bam')
79 | repbam3 = os.path.join(bamdir, groupinfor.loc[idx]['rep3'] + '.bam')
80 |
81 | repdel1 = groupinfor.loc[idx]['rep1']
82 | repdel2 = groupinfor.loc[idx]['rep2']
83 | repdel3 = groupinfor.loc[idx]['rep3']
84 | #ckbam = os.path.join(bamdir, groupinfor.loc[idx]['control'] + '.bam')
85 | genename = groupinfor.loc[idx]['gene']
86 | namenow = groupinfor.loc[idx]['group']
87 | pdfname = os.path.join(output, namenow + '_deletion_size.pdf')
88 | csvname = os.path.join(output, namenow + '_deletion_size.csv')
89 |
90 | #if (path.exists(repbam1) and path.exists(repbam2)) and path.exists(ckbam):
91 | if (path.exists(repbam1) and path.exists(repbam2) and path.exists(repbam3)):
92 | (deletelentpd1,deletelentpd1_p) = deletion_len(samfilename=repdel1, genename=genename, output=output)
93 | (deletelentpd2,deletelentpd2_p) = deletion_len(samfilename=repdel2, genename=genename, output=output)
94 | (deletelentpd3, deletelentpd3_p) = deletion_len(samfilename=repdel3, genename=genename, output=output)
95 | deletelentpdall = deletelentpd1
96 | #deletelentpdCK = deletion_len(samfilename=ckbam, genename=genename)
97 | #deletelentpd1 = pd.DataFrame.from_dict(pd1, orient='index')
98 | col1 = deletelentpd1_p.iloc[:, 1] ##提取pandas表格的第三列数值
99 | y1 = col1.values
100 | #deletelentpd2 = pd.DataFrame.from_dict(pd2, orient='index')
101 | col2 = deletelentpd2_p.iloc[:, 1]
102 | y2 = col2.values
103 | col3 = deletelentpd3_p.iloc[:, 1]
104 | y3 = col3.values
105 | #deletelentpdCK = pd.DataFrame.from_dict(pdCK, orient='index')
106 | #colCK = deletelentpdCK.iloc[:, 1]
107 | #yCK = colCK.values
108 | reg = pd.concat([col1, col2, col3], axis=1)
109 |
110 | regmean = reg.mean(axis=1)
111 | stdrr = reg.sem(axis=1)
112 | len_raw_data = pd.concat([deletelentpd1, deletelentpd2, deletelentpd3], axis=1)
113 | len_raw_data['ratio_mean'] = regmean
114 | len_raw_data['ratio_stdrr'] = stdrr
115 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep1_count','rep1_Ratio','rep2_count','rep2_Ratio','rep3_count','rep3_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8')
116 | print("Output",csvname, "finished!" )
117 |
118 | ##plot figures
119 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname)
120 |
121 | elif (path.exists(repbam1) and path.exists(repbam2)):
122 | print("Rep 3 is missing")
123 | (deletelentpd1,deletelentpd1_p) = deletion_len(samfilename=repdel1, genename=genename, output=output)
124 | (deletelentpd2,deletelentpd2_p) = deletion_len(samfilename=repdel2, genename=genename, output=output)
125 | deletelentpdall = deletelentpd1
126 | #deletelentpdCK = deletion_len(samfilename=ckbam, genename=genename)
127 | #deletelentpd1 = pd.DataFrame.from_dict(pd1, orient='index')
128 | col1 = deletelentpd1_p.iloc[:, 1] ##提取pandas表格的第三列数值
129 | y1 = col1.values
130 | #deletelentpd2 = pd.DataFrame.from_dict(pd2, orient='index')
131 | col2 = deletelentpd2_p.iloc[:, 1]
132 | y2 = col2.values
133 | #deletelentpdCK = pd.DataFrame.from_dict(pdCK, orient='index')
134 | #colCK = deletelentpdCK.iloc[:, 1]
135 | #yCK = colCK.values
136 | reg = pd.concat([col1, col2], axis=1)
137 |
138 | regmean = reg.mean(axis=1)
139 | stdrr = reg.sem(axis=1)
140 | len_raw_data = pd.concat([deletelentpd1, deletelentpd2], axis=1)
141 | len_raw_data['ratio_mean'] = regmean
142 | len_raw_data['ratio_stdrr'] = stdrr
143 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep1_count','rep1_Ratio','rep2_count','rep2_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8')
144 | print("Output",csvname, "finished!" )
145 |
146 | ##plot figures
147 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname)
148 |
149 | elif (path.exists(repbam1) and path.exists(repbam3)):
150 | print("Rep 2 is missing")
151 | (deletelentpd1,deletelentpd1_p) = deletion_len(samfilename=repdel1, genename=genename, output=output)
152 | (deletelentpd3,deletelentpd3_p) = deletion_len(samfilename=repdel3, genename=genename, output=output)
153 | deletelentpdall = deletelentpd1
154 | #deletelentpdCK = deletion_len(samfilename=ckbam, genename=genename)
155 | #deletelentpd1 = pd.DataFrame.from_dict(pd1, orient='index')
156 | col1 = deletelentpd1_p.iloc[:, 1] ##提取pandas表格的第三列数值
157 | y1 = col1.values
158 | #deletelentpd2 = pd.DataFrame.from_dict(pd2, orient='index')
159 | col3 = deletelentpd3_p.iloc[:, 1]
160 | y2 = col3.values
161 | #deletelentpdCK = pd.DataFrame.from_dict(pdCK, orient='index')
162 | #colCK = deletelentpdCK.iloc[:, 1]
163 | #yCK = colCK.values
164 | reg = pd.concat([col1, col3], axis=1)
165 |
166 | regmean = reg.mean(axis=1)
167 | stdrr = reg.sem(axis=1)
168 | len_raw_data = pd.concat([deletelentpd1, deletelentpd3], axis=1)
169 | len_raw_data['ratio_mean'] = regmean
170 | len_raw_data['ratio_stdrr'] = stdrr
171 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep1_count','rep1_Ratio','rep3_count','rep3_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8')
172 | print("Output",csvname, "finished!" )
173 |
174 | ##plot figures
175 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname)
176 |
177 | elif (path.exists(repbam2) and path.exists(repbam3)):
178 | print("Rep 1 is missing")
179 | (deletelentpd3,deletelentpd3_p) = deletion_len(samfilename=repdel3, genename=genename, output=output)
180 | (deletelentpd2,deletelentpd2_p) = deletion_len(samfilename=repdel2, genename=genename, output=output)
181 | deletelentpdall = deletelentpd3
182 | #deletelentpdCK = deletion_len(samfilename=ckbam, genename=genename)
183 | #deletelentpd1 = pd.DataFrame.from_dict(pd1, orient='index')
184 | col3 = deletelentpd3_p.iloc[:, 1] ##提取pandas表格的第三列数值
185 | y3 = col3.values
186 | #deletelentpd2 = pd.DataFrame.from_dict(pd2, orient='index')
187 | col2 = deletelentpd2_p.iloc[:, 1]
188 | y2 = col2.values
189 | #deletelentpdCK = pd.DataFrame.from_dict(pdCK, orient='index')
190 | #colCK = deletelentpdCK.iloc[:, 1]
191 | #yCK = colCK.values
192 | reg = pd.concat([col3, col2], axis=1)
193 |
194 | regmean = reg.mean(axis=1)
195 | stdrr = reg.sem(axis=1)
196 | len_raw_data = pd.concat([deletelentpd3, deletelentpd2], axis=1)
197 | len_raw_data['ratio_mean'] = regmean
198 | len_raw_data['ratio_stdrr'] = stdrr
199 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep2_count','rep2_Ratio','rep3_count','rep3_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8')
200 | print("Output",csvname, "finished!" )
201 |
202 | ##plot figures
203 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname)
204 |
205 | elif path.exists(repbam1):
206 | print("Rep2 and Rep3 are missing")
207 | (deletelentpd1,deletelentpd1_p) = deletion_len(samfilename=repdel1, genename=genename, output=output)
208 | deletelentpdall = deletelentpd1
209 | col1 = deletelentpd1_p.iloc[:, 1] ##提取pandas表格的第三列数值
210 |
211 | reg = pd.concat([col1], axis=1)
212 |
213 | regmean = reg.mean(axis=1)
214 | stdrr = reg.sem(axis=1)
215 | len_raw_data = pd.concat([deletelentpd1], axis=1)
216 | len_raw_data['ratio_mean'] = regmean
217 | len_raw_data['ratio_stdrr'] = stdrr
218 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep1_count','rep1_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8')
219 | print("Output",csvname, "finished!" )
220 |
221 | ##plot figures
222 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname)
223 |
224 | elif path.exists(repbam2):
225 | print("Rep1 and Rep3 are missing")
226 | (deletelentpd2,deletelentpd2_p) = deletion_len(samfilename=repdel2, genename=genename, output=output)
227 | deletelentpdall = deletelentpd2
228 | col2 = deletelentpd2_p.iloc[:, 1] ##提取pandas表格的第三列数值
229 |
230 | reg = pd.concat([col2], axis=1)
231 |
232 | regmean = reg.mean(axis=1)
233 | stdrr = reg.sem(axis=1)
234 | len_raw_data = pd.concat([deletelentpd2], axis=1)
235 | len_raw_data['ratio_mean'] = regmean
236 | len_raw_data['ratio_stdrr'] = stdrr
237 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep2_count','rep2_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8')
238 | print("Output",csvname, "finished!" )
239 |
240 | ##plot figures
241 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname)
242 |
243 | elif path.exists(repbam3):
244 | print("Rep1 and Rep2 are missing")
245 | (deletelentpd3,deletelentpd3_p) = deletion_len(samfilename=repdel3, genename=genename, output=output)
246 | deletelentpdall = deletelentpd3
247 | col3 = deletelentpd3_p.iloc[:, 1] ##提取pandas表格的第三列数值
248 | reg = pd.concat([col3], axis=1)
249 |
250 | regmean = reg.mean(axis=1)
251 | stdrr = reg.sem(axis=1)
252 | len_raw_data = pd.concat([deletelentpd3], axis=1)
253 | len_raw_data['ratio_mean'] = regmean
254 | len_raw_data['ratio_stdrr'] = stdrr
255 | len_raw_data.to_csv(csvname, index=True, index_label='Index', header=['rep3_count','rep3_Ratio','ratio_mean','ratio_stdrr'], sep=',',encoding='utf-8')
256 | print("Output",csvname, "finished!" )
257 |
258 | ##plot figures
259 | deletionbarplot(regmean, stdrr, namenow, deletelentpdall, pdfname)
260 |
261 |
262 |
--------------------------------------------------------------------------------
/start.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 |
5 | from PyQt5 import uic,QtWidgets
6 | from PyQt5.QtGui import QStandardItemModel, QStandardItem
7 | from PyQt5.QtCore import Qt
8 | from PyQt5.QtWidgets import QTableWidgetItem, QDialog, QHeaderView, QProgressDialog
9 | import os
10 |
11 | from CMlib.show_sampletable import showtable
12 | from CMlib.show_grouptable import showtable as showgrouptable
13 | from CMlib.show_result import showtable as showresult
14 | from CMlib.show_fasta import showfasta
15 | #from crisprmatch_running import showtable as crisprmatchrun
16 | from crisprmatch_running import mainprogram
17 |
18 | from subprocess import Popen
19 | from subprocess import PIPE
20 | # from CRISPRMatch import main as startrunning
21 |
22 | path = os.getcwd()
23 | qtCreatorFile = os.path.join(path,'CMlib/start.ui') # Aquí va el nombre de tu archivo
24 |
25 | Ui_MainWindow, QtBaseClass = uic.loadUiType(qtCreatorFile)
26 |
27 |
28 |
29 |
30 | class MyApp(QtWidgets.QMainWindow, Ui_MainWindow):
31 | def __init__(self):
32 | QtWidgets.QMainWindow.__init__(self)
33 | Ui_MainWindow.__init__(self)
34 | self.setupUi(self)
35 | self.setWindowTitle('CRISPRMatch Start Page')
36 |
37 | # Aquí van los botones
38 | self.load1btn.clicked.connect(self.getsampleCSV)
39 | self.load2btn.clicked.connect(self.getgenefa)
40 | self.load3btn.clicked.connect(self.getgroupCSV)
41 | self.Show1btn.clicked.connect(self.showsampletable)
42 | self.Show2btn.clicked.connect(self.showgenefa)
43 | self.Show3btn.clicked.connect(self.showgrouptable)
44 |
45 |
46 | self.tbnin.clicked.connect(self.inputdir)
47 | self.tbnout.clicked.connect(self.outputdir)
48 |
49 | self.startButton.clicked.connect(self.startrun)
50 | self.resultButton.clicked.connect(self.showresults)
51 |
52 |
53 |
54 |
55 | self.prosstext = str() ###step information list
56 | self.path1 = "" ###Check loading information
57 | self.path2 = ""
58 | self.path3 = ""
59 | self.path1check = ""
60 | self.path2check = ""
61 | self.path3check = ""
62 | self.outputdirpath = ""
63 | self.inputdirpath = ""
64 | self.resultcheck = ""
65 | self.step = ""
66 |
67 |
68 |
69 |
70 | ##############get sampleCSV and show table#########
71 | def getsampleCSV(self):
72 | filePath1, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path)
73 | if filePath1 != "":
74 | print("Direction", filePath1) # Opcional imprimir la dirección del archivo
75 | self.dfsample = pd.read_csv(str(filePath1))
76 | tmp = ' '.join(['load sample information table:',filePath1,';\n'])
77 | self.prosstext +=tmp
78 | self.processinfo.setText(self.prosstext)
79 | self.path1=filePath1
80 |
81 | def showsampletable(self):
82 | if self.path1 !="":
83 | self.ui = showtable() ##打开showtable新窗口
84 | result = self.ui.setuptable(self.dfsample) ##传递倒入sample csv
85 | tmp = ' '.join(['check sample table', ';\n'])
86 | self.prosstext += tmp
87 | self.processinfo.setText(self.prosstext)
88 | if result == "yes":
89 | self.ui.show() ##显示窗
90 | self.path1check = self.path1
91 | else:
92 | self.path1check = ""
93 | else:
94 | self.showMessageBox('Warning', 'Please load Sample information Table first')
95 | self.path1=""
96 |
97 |
98 |
99 |
100 |
101 | # ##################################################
102 |
103 | # ##############get genefa and show fasta#########
104 | def getgenefa(self):
105 | filePath2, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path)
106 | if filePath2 != "":
107 | print("Fasta Direction", filePath2)
108 | p = open(filePath2,'r')
109 | self.fafile = p.readlines()
110 | tmp = ' '.join(['load gene file:', filePath2, ';\n'])
111 | self.prosstext += tmp
112 | self.processinfo.setText(self.prosstext)
113 | self.path2 = filePath2
114 |
115 | def showgenefa(self):
116 | if self.path2 != "":
117 | self.ui = showfasta() ##打开showfasta新窗口
118 | result=self.ui.setuptext(self.fafile) ##传递倒入fasta
119 | tmp = ' '.join(['check gene sequence', ';\n'])
120 | self.prosstext += tmp
121 | self.processinfo.setText(self.prosstext)
122 | if result == "yes":
123 | self.ui.show() ##显示窗
124 | self.path2check = self.path2
125 | else:
126 | self.path2check = ""
127 | else:
128 | self.showMessageBox('Warning', 'Please load Gene fasta first')
129 | self.path2=""
130 | # ##################################################
131 |
132 | # ##############get groupCSV and show table#########
133 | def getgroupCSV(self):
134 | filePath3, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', path)
135 | if filePath3 != "":
136 | print("Direction", filePath3) # Opcional imprimir la dirección del archivo
137 | self.dfgroup = pd.read_csv(str(filePath3))
138 |
139 |
140 | stripstr = lambda x: x.strip() if isinstance(x, str) else x ##去除前后空格
141 | self.dfgroup = self.dfgroup.applymap(stripstr)
142 |
143 |
144 | tmp = ' '.join(['load gene file:', filePath3, ';\n'])
145 | self.prosstext += tmp
146 | self.processinfo.setText(self.prosstext)
147 | self.path3 = filePath3
148 |
149 | def showgrouptable(self):
150 | if self.path3 != "":
151 | self.ui = showgrouptable() ##打开showtable新窗口
152 | result = self.ui.setuptable(self.dfgroup) ##传递倒入sample csv
153 | tmp = ' '.join(['check group table', ';\n'])
154 | self.prosstext += tmp
155 | self.processinfo.setText(self.prosstext)
156 | ###判断格式
157 | if result == "yes":
158 | self.ui.show() ##显示窗
159 | self.path3check = self.path3
160 | else:
161 | self.path3check = ""
162 | else:
163 | self.showMessageBox('Warning', 'Please load Group information Table first')
164 | self.path3 = ""
165 | ##################################################
166 |
167 | # ##############set input and output file directory#########
168 | def inputdir(self):
169 | inputdirpath = QtWidgets.QFileDialog.getExistingDirectory(self,'open directory',path)
170 | if inputdirpath !="":
171 | print("Direction", inputdirpath)
172 | self.inputdirpath = inputdirpath
173 |
174 | self.lineEdit_input.setText(inputdirpath)
175 | tmp = ' '.join(['input directory:', inputdirpath, ';\n'])
176 | self.prosstext += tmp
177 | self.processinfo.setText(self.prosstext)
178 |
179 | def outputdir(self):
180 | outputdirpath = QtWidgets.QFileDialog.getExistingDirectory(self, 'open directory', path)
181 | if outputdirpath != "":
182 | print("Direction", outputdirpath)
183 | self.outputdirpath = outputdirpath
184 |
185 | self.lineEdit_output.setText(outputdirpath)
186 | tmp = ' '.join(['output directory:', outputdirpath, ';\n'])
187 | self.prosstext += tmp
188 | self.processinfo.setText(self.prosstext)
189 | ##################################################
190 |
191 | # ############## warning message #########
192 | def showMessageBox(self, title, message):
193 | msgBox = QtWidgets.QMessageBox()
194 | msgBox.setIcon(QtWidgets.QMessageBox.Warning)
195 | msgBox.setWindowTitle(title)
196 | msgBox.setText(message)
197 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
198 | msgBox.exec_()
199 | ##################################################
200 |
201 | # ############## start running ###################
202 | def startrun(self):
203 | if self.path1 != "" and self.path2 != "" and self.path3 != "":
204 | if self.path1check !="" and self.path2check !="" and self.path3check !="":
205 | sample = self.path1
206 | gene = self.path2
207 | group = self.path3
208 | input = self.inputdirpath
209 | self.output_tmp = self.outputdirpath + '/' + 'tmpfiles'
210 | self.output_result = self.outputdirpath + '/' + 'result'
211 |
212 | #self.ui.startrun(sample, gene, group, input, self.output_tmp, self.output_result)
213 | mainprogram(sample, gene, group, input, self.output_tmp, self.output_result)
214 |
215 |
216 | # x=startrunning(sample,gene,group,input,self.output_tmp,self.output_result)
217 | # tmp = ' '.join([x, ';\n'])
218 | # self.prosstext += tmp
219 | # self.processinfo.setText(self.prosstext)
220 | # self.step="done"
221 |
222 | msgBox = QtWidgets.QMessageBox()
223 | msgBox.setWindowTitle("Information")
224 | msgBox.setIcon(QtWidgets.QMessageBox.Information)
225 | msgBox.setText("Project Done!")
226 | msgBox.setDetailedText("The project has finished, please click ok to show result!")
227 | msgBox.setStandardButtons(QtWidgets.QMessageBox.Open)
228 |
229 | #msgBox.information(self,"Information","Project Done!")
230 | # msgBox.addButton("Show Result",QtWidgets.QMessageBox.ActionRole)
231 | # msgBox.clickedButton()
232 | msgBox.exec_()
233 | self.resultcheck = "done"
234 | self.showresults()
235 | else:
236 | self.showMessageBox('Warning', 'Please click show buttons for information checking')
237 | self.resultcheck = ""
238 | else:
239 | self.showMessageBox('Warning', 'Please load information first')
240 | self.resultcheck = ""
241 |
242 | ##################################################
243 |
244 | # ############## show results ###################
245 | def showresults(self):
246 | if self.path1 != "" and self.resultcheck =="done":
247 | self.ui = showresult() ##打开showtable新窗口
248 | self.ui.setuptable(self.dfsample,self.output_tmp,self.output_result,self.path2,self.dfgroup) ##传递倒入sample csv
249 | tmp = ' '.join(['check sample table', ';\n'])
250 | self.prosstext += tmp
251 | self.processinfo.setText(self.prosstext)
252 | self.ui.show() ##显示窗口
253 | else:
254 | self.showMessageBox('Warning', 'Please load Sample information Table first')
255 | self.path1 = ""
256 |
257 | # crispr = path + '/CRISPRMatch.py'
258 | # cmd = ' '.join(['python',crispr, '-g', gene, '-i', sample, '-gi', group, '-s', output_tmp, '-r', output_result])
259 | # print(cmd)
260 | # cmd_run = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
261 | # cmd_run.communicate()
262 | ##################################################
263 |
264 | # ############## show bar ###################
265 | def showbarprocess(self):
266 | if self.step != "done":
267 | num = int(100000)
268 | progress = QProgressDialog(self)
269 | progress.setWindowTitle("请稍等")
270 | progress.setLabelText("正在操作...")
271 | # progress.setCancelButtonText("取消")
272 | progress.setMinimumDuration(5)
273 | progress.setWindowModality(Qt.WindowModal)
274 | progress.setRange(0, num)
275 | for i in range(num):
276 | progress.setValue(i)
277 | if progress.wasCanceled():
278 | QtWidgets.QMessageBox.warning(self, "提示", "操作失败")
279 | break
280 | else:
281 | progress.setValue(num)
282 | #QtWidgets.QMessageBox.information(self, "提示", "操作成功")
283 | #self.showbarprocess()
284 | else:
285 | pass
286 |
287 |
288 |
289 | # self.boton2.clicked.connect(self.plot)
290 | # self.boton3.clicked.connect(self.showCSV)
291 | # self.boton4.clicked.connect(self.show_table)
292 | #
293 | # self.tableWidget.setAlternatingRowColors(True) # 隔行改变颜色
294 | # rown=self.tableWidget.rowCount() # 返回表格的行数
295 | # coln=self.tableWidget.columnCount() # 返回表格的列数
296 | # print(rown)
297 | #
298 | # #self.tableWidget.setHorizontalHeaderLabels('abcdef') # 设置表格表头数据
299 | # # self.tableWidget.setColumnCount(5) # 设置表格的列数
300 | # # self.tableWidget.setRowCount(3) # 设置表格的行数
301 | # # self.tableWidget.horizontalHeader().setSectionResizeMode(QtWidgets.QHeaderView.ResizeToContents) # 表格设置成大小随内容改变
302 | # # self.tableWidget.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
303 | # self.tableWidget.setItem(3, 3, QTableWidgetItem("insert3,3")) # 设置表格内容为字符串"content"
304 | # self.timeEdit = QtWidgets.QTimeEdit() # 创建一个timeEdit
305 | # self.tableWidget.setCellWidget(0, 0, self.timeEdit) # 把timeedit添加进tableWidget内
306 | # self.spinBox = QtWidgets.QSpinBox()
307 | # self.spinBox.setValue(10)
308 | # self.tableWidget.setCellWidget(2, 1, self.spinBox)
309 | #
310 | # ###set tableView
311 | # self.model = QStandardItemModel(4,4)
312 | # self.model.setHorizontalHeaderLabels(['标题1', '标题2', '标题3', '标题4'])
313 | # #下面代码让表格100填满窗口
314 | # self.tableView.horizontalHeader().setStretchLastSection(True)
315 | # self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
316 | # for row in range(4):
317 | # for column in range(4):
318 | # item = QStandardItem("row %s, column %s" % (row, column))
319 | # self.model.setItem(row, column, item)
320 | # self.tableView.setModel(self.model)
321 | # self.model.appendRow([
322 | # QStandardItem("row %s, column %s" % (11, 11)),
323 | # QStandardItem("row %s, column %s" % (11, 11)),
324 | # QStandardItem("row %s, column %s" % (11, 11)),
325 | # QStandardItem("row %s, column %s" % (11, 11)),
326 | # ])
327 | # # 取当前选中的所有行
328 | # index = self.tableView.currentIndex()
329 | # print(index.row())
330 | # self.model.removeRow(index.row())
331 | # ###########
332 | #
333 | #
334 | #
335 | #
336 | # # Aquí van las nuevas funciones
337 | # # Esta función abre el archivo CSV
338 | # def getCSV(self):
339 | # filePath, _ = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file', '/Users/qyou/GitLab/pyQttest')
340 | # if filePath != "":
341 | # print("Dirección", filePath) # Opcional imprimir la dirección del archivo
342 | # self.df = pd.read_csv(str(filePath))
343 | #
344 | # def plot(self):
345 | # x=self.df['col1']
346 | # y=self.df['col2']
347 | # plt.plot(x,y)
348 | # plt.show()
349 | # estad_st="Estadisticas de col2: " +str(self.df['col2'].describe())
350 | # self.resultado.setText(estad_st)
351 | #
352 | # def showCSV(self):
353 | # ###set tableView
354 | # rown=len(self.df.index)
355 | # coln=len(self.df.columns)
356 | # self.model = QStandardItemModel(rown,coln)
357 | # labels = list(self.df.columns.values)
358 | # #labels=['Index','Sample','Vector','Note','gRNA_PAM','start','end','Type','gene_name']
359 | # self.model.setHorizontalHeaderLabels(labels)
360 | # #下面代码让表格100填满窗口
361 | # # self.tableView_csv.horizontalHeader().setStretchLastSection(True)
362 | # # self.tableView_csv.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
363 | #
364 | # for row in range(rown):
365 | # #print(self.df.loc[row].Sample)
366 | # for column in range(len(labels)):
367 | # item = QStandardItem(str(self.df.loc[row][labels[column]]))
368 | # self.model.setItem(row, column, item)
369 | # self.tableView_csv.setModel(self.model)
370 | # estad_st=str(self.df)
371 | # self.resultado.setText(estad_st)
372 | # def show_table(self):
373 | # #self.showtableWindow = QtWidgets.QDialog()
374 | # self.ui = showtable() ##打开showtable新窗口
375 | # self.ui.setuptable(self.df) ##传递倒入sample csv
376 | # self.ui.show() ##显示窗口
377 |
378 |
379 |
380 |
381 |
382 | if __name__ == "__main__":
383 | app = QtWidgets.QApplication(sys.argv)
384 | window = MyApp()
385 | window.show()
386 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/crisprmatch_running.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 |
4 | from CMlib import bwa
5 | import os
6 | import os.path
7 | from CMlib import bwa_run
8 | from CMlib import mut_rate_filter
9 | from CMlib import output_aln_fa_filter
10 | from CMlib import plot_each_bam_filter
11 | from CMlib import Barplot_deletion_filter
12 | from subprocess import Popen
13 | from subprocess import PIPE
14 | import re
15 |
16 | from PyQt5 import uic,QtWidgets
17 | from PyQt5.QtGui import QStandardItemModel, QStandardItem
18 | from PyQt5.QtWidgets import QHeaderView, QPushButton,QProgressDialog
19 | from PyQt5.QtCore import Qt
20 |
21 |
22 | # pathdir = os.getcwd()
23 | # qtCreatorFile = os.path.join(pathdir,'CMlib/processing.ui')
24 | #
25 | # Ui_showtable, QtBaseClass = uic.loadUiType(qtCreatorFile)
26 |
27 | # class showtable(QtWidgets.QDialog, Ui_showtable):
28 | # def __init__(self):
29 | # QtWidgets.QDialog.__init__(self)
30 | # Ui_showtable.__init__(self)
31 | # self.setupUi(self)
32 | #
33 | # self.setWindowTitle('Project Running')
34 | # self.prosstext = str()
35 | # #self.closebtn.clicked.connect(self.sampleEdit)
36 | # print("Start running")
37 | # #self.show()
38 | #
39 | # def getdata(self,content):
40 | # self.prosstext += content
41 | # self.processinfo.setText(self.prosstext)
42 | #
43 | #
44 | #
45 | # def startrun(self,sample,gene,group,inputdir,tmpfile,result):
46 | # mainprogram(sample,gene,group,inputdir,tmpfile,result)
47 | #
48 | #
49 | # if __name__ == "__main__":
50 | # app = QtWidgets.QApplication(sys.argv)
51 | # window = showtable()
52 | # window.show()
53 | # sys.exit(app.exec_())
54 |
55 | def mainprogram(sample,gene,group,inputdir,tmpfile,result):
56 | """
57 | :param sample:
58 | :param gene:
59 | :param group:
60 | :param inputdir:
61 | :param tmpfile:
62 | :param result:
63 | :return:
64 | """
65 | args = check_options(get_options(sample,gene,group,tmpfile,result))
66 | # ?build bwa index
67 | bwaindexfile = os.path.basename(args.genome)
68 | bwatestindex = os.path.join(args.saved, bwaindexfile+'.sa')
69 | bwaindex = os.path.join(args.saved, bwaindexfile)
70 | bwabuild = True
71 | # if os.path.isfile(bwatestindex):
72 | #
73 | # if not args.docker:
74 | #
75 | # print('find:', bwatestindex)
76 | #
77 | # bwamess = "Found bwa index file " + bwatestindex + ". Do you want rebuild it? Press Y or N to continue:"
78 | #
79 | # print(bwamess)
80 | #
81 | # while True:
82 | #
83 | # char = getch()
84 | #
85 | # if char.lower() in ("y", "n"):
86 | #
87 | # print(char)
88 | #
89 | # if char == 'y':
90 | #
91 | # bwabuild = True
92 | #
93 | # elif char == 'n':
94 | #
95 | # bwabuild = False
96 | #
97 | # break
98 | print("bwabuild:", bwabuild, "threads:", args.threads)
99 | #print("genomesize:", genomesize, "kmer:", kmer, "jfkmerfile:",
100 | # jfkmerfile, "kmerbuild:", kmerbuild, "bwabuild:", bwabuild, "threads:", args.threads)
101 | # ?Build Gene_fasta index
102 | showbarprocess("Start Running...")
103 | if bwabuild:
104 | bwa.bwaindex(args.bwa, args.genome, args.saved)
105 | print("## Step 1:")
106 | print("bwa index build finished ...")
107 | else:
108 | print("Use", bwatestindex)
109 | # self.prosstext += "## Step 1:"
110 | # self.processinfo.setText(self.prosstext)
111 | print("bwa index finshed!!")
112 | # self.prosstext += "bwa index finshed!!"
113 | # self.processinfo.setText(self.prosstext)
114 | # ?run bwa alignment
115 | # self.prosstext += "## Step 2:"
116 | # self.processinfo.setText(self.prosstext)
117 |
118 | showbarprocess("Start mapping...")
119 | print("## Step 2:")
120 | print("loading fastq files...!")
121 | bwa_run.prepare(args.input, args.genome, args.saved, args.bwa, args.samtools, args.picard, inputdir)
122 | print("bwa mem finished!")
123 |
124 |
125 |
126 | # self.prosstext += "bwa mem finished!"
127 | # self.processinfo.setText(self.prosstext)
128 | # end run bwa alignment
129 | # # # ?mutation ration calculation
130 | # # print("## Step 3:")
131 | # # mut_rate.rate_cal(args.input, args.groupinfo, args.genome, args.result, args.saved)
132 | # # print("Mutation calculation finished!")
133 | # # # end mutation
134 | #
135 | # ?mutation ration calculation filter
136 | # self.prosstext += "## Step 3:"
137 | # self.processinfo.setText(self.prosstext)
138 | showbarprocess("Start calculting...")
139 | print("## Step 3 update:")
140 | mut_rate_filter.rate_cal_filter(args.input, args.groupinfo, args.genome, args.result, args.saved)
141 | print("Mutation calculation finished!")
142 | # self.prosstext += "Mutation calculation finished!"
143 | # self.processinfo.setText(self.prosstext)
144 | # # ?mutation result display
145 | # mut_rate_filter.display_filter(args.groupinfo, args.result)
146 | # print("Mutation calculation result have been displayed!")
147 | # # end mutation
148 | #
149 | # # # ?mutation result display
150 | # # mut_rate.display(args.groupinfo, args.result)
151 | # # print("Mutation calculation result have been displayed!")
152 | # # # end display
153 | #
154 | #
155 | # # # ?output aln and fa file
156 | # # print("## Step 4:")
157 | # # output_aln_fa.alnfile(args.input, args.groupinfo, args.genome, args.result, args.saved)
158 | # # print("Alignment files were output!")
159 | # # # end output aln and fa file
160 | #
161 | # ?output aln and fa file
162 | # self.prosstext += "## Step 4:"
163 | # self.processinfo.setText(self.prosstext)
164 | showbarprocess("Start statisitcs...")
165 | print("## Step 4 update:")
166 | output_aln_fa_filter.alnfile_filter(args.input, args.groupinfo, args.genome, args.result, args.saved)
167 | print("Alignment files were output!")
168 | # self.prosstext += "Alignment files were output!"
169 | # self.processinfo.setText(self.prosstext)
170 | # end output aln and fa file
171 | # # ?output aln figure
172 | # print("Starting to plot each alignment...")
173 | # output_aln_fa.alnpdf(args.input, args.result)
174 | # print("Alignment figures were done!")
175 | # # end output aln figure
176 | #
177 | #
178 | # # ?plot each bam
179 | # print("## Step 5:")
180 | # print("Starting to plot each bam...")
181 | # plot_each_bam.barchart(args.input, args.groupinfo,args.genome, args.result, args.saved)
182 | # print("plot each bam finished!")
183 | # # end plot each bam
184 | # ?plot each bam
185 | # self.prosstext += "## Step 5:"
186 | # self.processinfo.setText(self.prosstext)
187 | showbarprocess("Start plotting...")
188 | print("## Step 5 update:")
189 | print("Starting to plot each bam...")
190 | plot_each_bam_filter.barchart_filter(args.input, args.groupinfo,args.genome, args.result, args.saved)
191 | print("plot each bam finished!")
192 | # self.prosstext += "plot each bam finished!"
193 | # self.processinfo.setText(self.prosstext)
194 | # end plot each bam
195 | # ?plot each bam
196 | showbarprocess("Start outputting...")
197 | print("Starting to plot each group deletion size...")
198 | Barplot_deletion_filter.barchart_filter(args.groupinfo, args.result, args.saved)
199 | print("plot each group finished!")
200 |
201 |
202 | # self.prosstext += "plot each group finished!"
203 | # self.processinfo.setText(self.prosstext)
204 | # end plot each bam
205 | # ?plot pdf
206 | # print("## Step 6:")
207 | # print("Starting to plot pdf...")
208 | # plot_pdf.plotpdf(args.groupinfo, args.genome, args.result, args.saved)
209 | # print("plot pdf finished!")
210 | # # end plot pdf
211 | # # ?plot pdf
212 | # print("## Step 6 update:")
213 | # print("Starting to plot pdf...")
214 | # plot_pdf_filter.plotpdf_filter(args.groupinfo, args.genome, args.result, args.saved)
215 | # print("plot pdf finished!")
216 | # # end plot pdf
217 | #
218 | # # ?output aln and fa file
219 | # print("## Step test:")
220 | # output_aln_pdf.alnpdftest(args.input, args.result, args.genome,args.groupinfo)
221 | # print("Alignment files were output!")
222 | # # end output aln and fa file
223 | return "Process Done!"
224 | def check_options(parser):
225 | args = parser.parse_args()
226 | # Start check samtools
227 | if args.samtools:
228 | if not os.path.exists(args.samtools):
229 | print("Can not locate samtools, please input full path of samtools\n")
230 | parser.print_help()
231 | sys.exit(1)
232 | else:
233 | samtoolspath = which('samtools')
234 | if samtoolspath:
235 | samtoolsversion=samtools('samtools')
236 | if samtoolsversion == 'None':
237 | print("Can not locate samtools, please input full path of samtools\n")
238 | parser.print_help()
239 | sys.exit(1)
240 | else:
241 | args.samtools = samtoolspath[0]
242 | else:
243 | print("Can not locate samtools, please input full path of samtools\n")
244 | parser.print_help()
245 | sys.exit(1)
246 | # End check samtools
247 | # Start check picard
248 | if args.picard:
249 | if not os.path.exists(args.picard):
250 | print("Can not locate picard, please input full path of picard\n")
251 | parser.print_help()
252 | sys.exit(1)
253 | else:
254 | picardpath = which('picard')
255 | if picardpath:
256 | picardversion=picard('picard')
257 | if picardversion == 'None':
258 | print("Can not locate picard, please input full path of picard\n")
259 | parser.print_help()
260 | sys.exit(1)
261 | else:
262 | args.picard = picardpath[0]
263 | else:
264 | print("Can not locate picard, please input full path of picard\n")
265 | parser.print_help()
266 | sys.exit(1)
267 | # End check picard
268 | # Start check bwa
269 | if args.bwa:
270 | if not os.path.exists(args.bwa):
271 | print("Can not locate bwa, please input full path of bwa\n")
272 | parser.print_help()
273 | sys.exit(1)
274 | bwaversion = bwa.bwaversion(args.bwa)
275 | if bwaversion == 'None':
276 | print("Can not locate bwa, please input full path of bwa\n")
277 | parser.print_help()
278 | sys.exit(1)
279 | else:
280 | bwapath = which('bwa')
281 | if bwapath:
282 | bwaversion = bwa.bwaversion(bwapath[0])
283 | if bwaversion == 'None':
284 | print("Can not locate bwa, please input full path of bwa\n")
285 | parser.print_help()
286 | sys.exit(1)
287 | else:
288 | args.bwa = bwapath[0]
289 | else:
290 | print("Can not locate bwa, please input full path of bwa\n")
291 | parser.print_help()
292 | sys.exit(1)
293 | # End check bwa
294 | if not os.path.exists(args.genome):
295 | print("Can not locate genome file, please input genome file.\n")
296 | parser.print_help()
297 | sys.exit(1)
298 | # Start check saved folder
299 | if not os.path.exists(args.saved):
300 | os.mkdir(args.saved)
301 | #End check saved folder
302 | # Start check result folder
303 | if not os.path.exists(args.result):
304 | os.mkdir(args.result)
305 | print("#"*40)
306 | print("bwa version:", args.bwa, bwaversion)
307 | print("samtools version:", args.samtools, samtoolsversion)
308 | print("picard version:", args.picard, picardversion)
309 | #print("jellyfish version:", args.jellyfish, jellyfishversion)
310 | print("genome file:", args.genome)
311 | #print("input file:", args.input)
312 | #print("5\' labeled R primer:", args.primer)
313 | print("tmp output folder:", os.path.realpath(args.saved))
314 | print("result output folder:", os.path.realpath(args.result))
315 | print("threads number:", args.threads)
316 | #print("homology:", args.homology)
317 | #print("dtm:", args.dtm)
318 | print("#"*40)
319 | return args
320 | # def check_options(parser):
321 | #
322 | # args = parser.parse_args()
323 | #
324 | # # Start check samtools
325 | # if args.samtools:
326 | #
327 | # if not os.path.exists(args.samtools):
328 | #
329 | # print("Can not locate samtools, please input full path of samtools\n")
330 | #
331 | # parser.print_help()
332 | #
333 | # sys.exit(1)
334 | #
335 | # else:
336 | #
337 | # samtoolspath = which('samtools')
338 | #
339 | # if samtoolspath:
340 | #
341 | # samtoolsversion=samtools('samtools')
342 | # if samtoolsversion == 'None':
343 | #
344 | # print("Can not locate samtools, please input full path of samtools\n")
345 | #
346 | # parser.print_help()
347 | #
348 | # sys.exit(1)
349 | #
350 | # else:
351 | #
352 | # args.samtools = samtoolspath[0]
353 | #
354 | # else:
355 | #
356 | # print("Can not locate samtools, please input full path of samtools\n")
357 | #
358 | # parser.print_help()
359 | #
360 | # sys.exit(1)
361 | #
362 | # # End check samtools
363 | #
364 | # # Start check picard
365 | # if args.picard:
366 | #
367 | # if not os.path.exists(args.picard):
368 | #
369 | # print("Can not locate picard, please input full path of picard\n")
370 | #
371 | # parser.print_help()
372 | #
373 | # sys.exit(1)
374 | #
375 | # else:
376 | #
377 | # picardpath = which('picard')
378 | #
379 | # if picardpath:
380 | #
381 | # picardversion=picard('picard')
382 | # if picardversion == 'None':
383 | #
384 | # print("Can not locate picard, please input full path of picard\n")
385 | #
386 | # parser.print_help()
387 | #
388 | # sys.exit(1)
389 | #
390 | # else:
391 | #
392 | # args.picard = picardpath[0]
393 | #
394 | # else:
395 | #
396 | # print("Can not locate picard, please input full path of picard\n")
397 | #
398 | # parser.print_help()
399 | #
400 | # sys.exit(1)
401 | #
402 | # # End check picard
403 | #
404 | # # Start check bwa
405 | # if args.bwa:
406 | #
407 | # if not os.path.exists(args.bwa):
408 | #
409 | # print("Can not locate bwa, please input full path of bwa\n")
410 | #
411 | # parser.print_help()
412 | #
413 | # sys.exit(1)
414 | #
415 | # bwaversion = bwa.bwaversion(args.bwa)
416 | #
417 | # if bwaversion == 'None':
418 | #
419 | # print("Can not locate bwa, please input full path of bwa\n")
420 | #
421 | # parser.print_help()
422 | #
423 | # sys.exit(1)
424 | #
425 | # else:
426 | #
427 | # bwapath = which('bwa')
428 | #
429 | # if bwapath:
430 | #
431 | # bwaversion = bwa.bwaversion(bwapath[0])
432 | #
433 | # if bwaversion == 'None':
434 | #
435 | # print("Can not locate bwa, please input full path of bwa\n")
436 | #
437 | # parser.print_help()
438 | #
439 | # sys.exit(1)
440 | #
441 | # else:
442 | #
443 | # args.bwa = bwapath[0]
444 | #
445 | # else:
446 | #
447 | # print("Can not locate bwa, please input full path of bwa\n")
448 | #
449 | # parser.print_help()
450 | #
451 | # sys.exit(1)
452 | #
453 | # # End check bwa
454 | #
455 | # if not os.path.exists(args.genome):
456 | #
457 | # print("Can not locate genome file, please input genome file.\n")
458 | #
459 | # parser.print_help()
460 | #
461 | # sys.exit(1)
462 | #
463 | # # Start check saved folder
464 | # if not os.path.exists(args.saved):
465 | #
466 | # os.mkdir(args.saved)
467 | #
468 | # #End check saved folder
469 | #
470 | # # Start check result folder
471 | # if not os.path.exists(args.result):
472 | # os.mkdir(args.result)
473 | #
474 | # # End check result folder
475 | #
476 | # # # Start check saved folder
477 | # # if os.path.exists(args.saved):
478 | # #
479 | # # if not args.docker:
480 | # #
481 | # # print(args.saved, "exists. Everything in this folder will be remove. Press Y or N to continue: ")
482 | # #
483 | # # while True:
484 | # #
485 | # # char = getch()
486 | # #
487 | # # if char.lower() in ("y", "n"):
488 | # #
489 | # # print(char)
490 | # #
491 | # # if char == 'n':
492 | # #
493 | # # sys.exit(1)
494 | # #
495 | # # break
496 | # #
497 | # # else:
498 | # #
499 | # # os.mkdir(args.saved)
500 | # # End check saved folder
501 | #
502 | # # Print Checked information
503 | # print("#"*40)
504 | #
505 | # print("bwa version:", args.bwa, bwaversion)
506 | #
507 | # print("samtools version:", args.samtools, samtoolsversion)
508 | #
509 | # print("picard version:", args.picard, picardversion)
510 | #
511 | # #print("jellyfish version:", args.jellyfish, jellyfishversion)
512 | #
513 | # print("genome file:", args.genome)
514 | #
515 | # #print("input file:", args.input)
516 | #
517 | # #print("5\' labeled R primer:", args.primer)
518 | #
519 | # print("tmp output folder:", os.path.realpath(args.saved))
520 | # print("result output folder:", os.path.realpath(args.result))
521 | #
522 | # print("threads number:", args.threads)
523 | #
524 | # #print("homology:", args.homology)
525 | #
526 | # #print("dtm:", args.dtm)
527 | #
528 | # print("#"*40)
529 | #
530 | # return args
531 | def getch():
532 | """
533 | For yes/no choice
534 | """
535 | import sys, tty, termios
536 | fd = sys.stdin.fileno()
537 | old_settings = termios.tcgetattr(fd)
538 | try:
539 | tty.setraw(sys.stdin.fileno())
540 | ch = sys.stdin.read(1)
541 | finally:
542 | termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
543 | return ch
544 | def which(filename):
545 | """docstring for which"""
546 | locations = os.environ.get("PATH").split(os.pathsep)
547 | candidates = []
548 | for location in locations:
549 | candidate = os.path.join(location, filename)
550 | if os.path.isfile(candidate):
551 | candidates.append(candidate)
552 | return candidates
553 | def samtools(filename):
554 | """
555 | :param filename:
556 | :return: samtools version
557 | """
558 | samtoolspath=which(filename)
559 | samtoolscmd = ' '.join([samtoolspath[0], '--version'])
560 | #location= samtoolspath[0]
561 | samtoolsrun = Popen(samtoolscmd, stdout=PIPE, stderr=PIPE, shell=True)
562 | i=samtoolsrun.stdout.readlines()[0]
563 | version = i.decode('utf-8').rstrip('\n')
564 | samtoolsrun.communicate()
565 | return version
566 | def picard(filename):
567 | """
568 | :param filename:
569 | :return:
570 | """
571 | picardpath=which(filename)
572 | picardcmd = ' '.join([picardpath[0], 'ViewSam', '-h'])
573 | version = 'None'
574 | picardrun = Popen(picardcmd, stdout=PIPE, stderr=PIPE, shell=True)
575 | #print(picardcmd)
576 | for i in picardrun.stderr.readlines():
577 | i = i.decode('utf-8').rstrip('\n')
578 | if re.search('Version', i):
579 | (_, version) = i.split(' ')
580 | print(version)
581 | picardrun.communicate()
582 | return version
583 | def get_options(sample,gene,group,tmpfile,result):
584 | parser = argparse.ArgumentParser(description="CRISPRMatch is for location finding", prog='CRISPRMatch')
585 | parser.add_argument('--version', action='version', version='%(prog)s 1.0')
586 | parser.add_argument('-b', '--bwa', dest='bwa', help='bwa path')
587 | parser.add_argument('-sm', '--samtools', dest='samtools', help='samtools path')
588 | parser.add_argument('-pi', '--picard', dest='picard', help='picard path')
589 | parser.add_argument('-g', '--genome', dest='genome', help='fasta format genome file', default=gene)
590 | # parser.add_argument('-g', '--genome', dest='genome', help='fasta format genome file', required=True)
591 | # parser.add_argument('-i', '--input', dest='input', help='sample information input file', required=True)
592 | # parser.add_argument('-gi', '--groupinfo', dest='groupinfo', help='group information input file', required=True)
593 | parser.add_argument('-i', '--input', dest='input', help='sample information input file', default=sample)
594 | parser.add_argument('-gi', '--groupinfo', dest='groupinfo', help='group information input file', default=group)
595 | parser.add_argument('-s', '--save', dest='saved', help='tmp saved folder', default=tmpfile)
596 | parser.add_argument('-r', '--result', dest='result', help='result saved folder', default=result)
597 | # parser.add_argument('-s', '--save', dest='saved', help='tmp saved folder', default='tmpfiles')
598 | #
599 | # parser.add_argument('-r', '--result', dest='result', help='result saved folder', default='result')
600 | parser.add_argument('-t', '--threads', dest='threads', help='threads number or how may cpu you wanna use',
601 | default=1, type=int)
602 | parser.add_argument('--docker', default=False)
603 | # parser.parse_args(['--version'])
604 | # args = parser.parse_args()
605 | return parser
606 |
607 | def showbarprocess(content):
608 | num = int(100000)
609 | progress = QProgressDialog()
610 | progress.setWindowTitle("Processing ...")
611 | progress.setLabelText(content)
612 | progress.setCancelButton(None) ##不显示cancel button
613 | #progress.setCancelButtonText("")
614 | progress.setMinimumDuration(5)
615 | progress.setWindowModality(Qt.WindowModal)
616 | progress.setRange(0, num)
617 | for i in range(num):
618 | progress.setValue(i)
619 |
620 | else:
621 | progress.setValue(num)
622 |
623 | progress.cancel()
624 |
625 | # QtWidgets.QMessageBox.information(self, "提示", "操作成功")
626 | # self.showbarprocess()
627 |
628 | # i __name__ == "__main__":
629 | #
630 | # try:
631 | #
632 | # main()
633 | #
634 | # except KeyboardInterrupt:
635 | #
636 | # sys.stderr.write("User interrupt\n")
637 | #
638 | # sys.exit(0)
639 |
--------------------------------------------------------------------------------
/CMlib/mut_rate_filter.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pysam
3 | from pyfasta import Fasta
4 | import matplotlib
5 | from scipy import stats
6 | import matplotlib.pyplot as plt
7 | import pandas as pd
8 | import numpy as np
9 | import re
10 | from glob import glob
11 | from matplotlib.ticker import FormatStrFormatter
12 | from CMlib.showprocess import showbarprocess
13 | from PyQt5 import QtWidgets
14 |
15 | def rate_cal_filter(infofile, groupinfo, refname, output, bamdir):
16 | """
17 |
18 | :param infofile: a description file of details of each sample, example: sample_infor.txt
19 | :param groupinfo: a description file of details of each group, example: group_infor.txt
20 | :param refname: a fasta format of the sequence in the target region, exaple:Samples_gene.fa
21 | :param output: folder of final result
22 | :param bamdir: folder of temporary files
23 | :return:
24 | """
25 |
26 |
27 | info=pd.read_csv(infofile,index_col="Index")
28 | groupinfor = pd.read_csv(groupinfo)
29 | #print(groupinfor)
30 | #groupinfor = groupinfor.dropna(axis=0, how='any',thresh=7) ##过滤表哥中没填满的行,thresh=7表示至少7个数不是NA
31 | groupinfor.ix[:,pd.isnull(groupinfor).all()] = "UNKNOWN"
32 | groupinfor=groupinfor.fillna("UNKNOWN") ##填充表格中NaN处
33 | stranddict = dict()
34 | for idy in groupinfor.index:
35 | stranddict[groupinfor.loc[idy].rep1] = groupinfor.loc[idy].strand
36 | stranddict[groupinfor.loc[idy].rep2] = groupinfor.loc[idy].strand
37 | stranddict[groupinfor.loc[idy].rep3] = groupinfor.loc[idy].strand
38 | stranddict[groupinfor.loc[idy].control] = groupinfor.loc[idy].strand
39 |
40 | outputname = os.path.join(output, 'mut_rate.all.txt')
41 | outio = open(outputname, "w")
42 | outiofilter_file = os.path.join(output, 'filter_wt_reads_number.txt')
43 | outiofilter = open(outiofilter_file, 'w')
44 | print("Sample\tfilter", file=outiofilter)
45 | fa = Fasta(refname)
46 | print("start calculation!")
47 | #print("Sample\tmuation\treplace\tinsertion_only\tdeletion_only\tinsert&deletion", file=outio)
48 | print("Sample\tmuation\treplace\tinsertion_only\tdeletion_only\tinsert&deletion\tmuation_count\treplace_count\tinsertion_only_count\tdeletion_only_count\tinsert&deletion_count\ttotal_read_count", file=outio)
49 |
50 |
51 | for idx in info.index:
52 | # print(info.loc[idx].Note, info.loc[idx].gene_name, info.loc[idx].start, info.loc[idx].end)
53 |
54 | note = info.loc[idx].Note
55 |
56 | if note not in stranddict:
57 | error = ' '.join([note, 'is not involved in group table! Please Check!'])
58 | showwarnings("Error", error)
59 | continue
60 |
61 |
62 | bamname = os.path.join(bamdir, info.loc[idx].Note+'.bam')
63 | print("Calculating",bamname)
64 | strand = stranddict[note]
65 |
66 | tmp = ' '.join(['Calculating', info.loc[idx].Note])
67 | showbarprocess(tmp)
68 |
69 |
70 | if (re.search("gRNA", info.loc[idx].Note)): ##5'端延伸10,3'端延伸10
71 | if strand == '+':
72 | start = info.loc[idx]['start'] - 10
73 | end = info.loc[idx]['end'] + 10
74 |
75 | else:
76 | start = info.loc[idx]['start'] - 10
77 | end = info.loc[idx]['end'] + 10
78 |
79 | elif (re.search("crRNA", info.loc[idx].Note)): ##5'端延伸10,3'端延伸30
80 | if strand == '+':
81 | start = info.loc[idx]['start'] - 10
82 | end = info.loc[idx]['end'] + 30
83 |
84 | else:
85 | start = info.loc[idx]['start'] - 30
86 | end = info.loc[idx]['end'] + 10
87 |
88 |
89 | # if (re.search("gRNA", info.loc[idx].Note)):
90 | # start = info.loc[idx].start - 10
91 | # end = info.loc[idx].end + 10
92 | # # print(info.loc[idx].Note, "orignal-start",info.loc[idx].start, "after:",start, "orignal-end",info.loc[idx].end, "after:",end)
93 | # elif (re.search("crRNA", info.loc[idx].Note)):
94 | # start = info.loc[idx].start
95 | # end = info.loc[idx].end + 30
96 | # print(info.loc[idx].Note, "orignal-start",info.loc[idx].start, "after:",start, "orignal-end",info.loc[idx].end, "after:",end)
97 | # start = info.loc[idx].start
98 | # end = info.loc[idx].end
99 | gene = info.loc[idx].gene_name
100 | samfile = pysam.AlignmentFile(bamname, "rb", check_sq=False)
101 | #print(samfile.count())
102 | mtreads = set()
103 | totalcov = 0
104 | #covage = samfile.count()/100
105 | covage = set()
106 |
107 | replace = set()
108 |
109 | replace_left = set()
110 | all_tmp = set()
111 | wt_set = set()
112 | replace_side = set()
113 | wt_side_set = set()
114 | wt_final_set = set()
115 | total_read = set()
116 | filter_read = set()
117 | test = set()
118 |
119 | insert = set()
120 |
121 | deletion = set()
122 |
123 | mthreads_all = set()
124 |
125 | insert_deletion = set()
126 |
127 | insert_only = set()
128 |
129 | deletion_only = set()
130 |
131 | for pileupcolumn in samfile.pileup(gene, max_depth=50000):
132 |
133 | # print (pileupcolumn.pos, pileupcolumn.n) pos代表该位点的坐标,n代表它的coverage,pileups代表对应的reads
134 |
135 | totalcov += pileupcolumn.n
136 |
137 | if end >= pileupcolumn.pos >= start:
138 |
139 |
140 | for pileupread in pileupcolumn.pileups:
141 |
142 | covage.add(pileupread.alignment.query_name)
143 |
144 | if not pileupread.is_del and not pileupread.is_refskip:
145 | # print(pileupread.query_position)
146 | querybase = pileupread.alignment.query_sequence[pileupread.query_position]
147 |
148 | # refbase = pileupread.alignment.get_reference_sequence()[pileupread.query_position]
149 |
150 | refbase = fa[gene][pileupcolumn.pos].upper()
151 | all_tmp.add(pileupread.alignment.query_name)
152 |
153 | if querybase != refbase:
154 | # replace += 1
155 | mtreads.add(pileupread.alignment.query_name)
156 |
157 | replace.add(pileupread.alignment.query_name)
158 |
159 | # pileupread.indel: 在当前的pileup位点之后的位置的indel长度。如果下一个位点是insertion,indel>0;如果下一个位点是deletion,indel<0
160 | if pileupread.indel > 0:
161 | # insert += 1
162 | mtreads.add(pileupread.alignment.query_name)
163 | insert.add(pileupread.alignment.query_name)
164 | mthreads_all.add(pileupread.alignment.query_name)
165 |
166 | if pileupread.indel < 0:
167 | # deletion += 1
168 | mtreads.add(pileupread.alignment.query_name)
169 | deletion.add(pileupread.alignment.query_name)
170 | mthreads_all.add(pileupread.alignment.query_name)
171 |
172 | # print(pileupcolumn.pos, pileupcolumn.n, replace, insert, deletion)
173 | wt_set = all_tmp - replace
174 | for pileupcolumn_filter in samfile.pileup(gene, max_depth=50000):
175 |
176 | if start > pileupcolumn_filter.pos >= 0 or pileupcolumn_filter.pos > end:
177 | for pileupread_filter in pileupcolumn_filter.pileups:
178 | # for replace_filter in replace_all:
179 |
180 | # if replace_filter in str(pileupread_filter) :
181 | # replace_side.add(pileupread_filter.alignment.query_name)
182 | test.add(pileupread_filter.alignment.query_name)
183 | if pileupread_filter.alignment.query_name not in replace_left:
184 |
185 | if not pileupread_filter.is_del and not pileupread_filter.is_refskip:
186 | querybase_filter = pileupread_filter.alignment.query_sequence[
187 | pileupread_filter.query_position]
188 |
189 | # refbase = pileupread.alignment.get_reference_sequence()[pileupread_filter.query_position]
190 |
191 | refbase_filter = fa[gene][pileupcolumn_filter.pos].upper()
192 | replace_side.add(pileupread_filter.alignment.query_name) # 两边无突变
193 | if querybase_filter != refbase_filter:
194 | # replace += 1
195 | # mtreads.add(pileupread.alignment.query_name)
196 |
197 | # replace.add(pileupread.alignment.query_name)
198 | replace_left.add(pileupread_filter.alignment.query_name) # 两边无突变,有错配
199 | # break
200 |
201 | # print(len(test))
202 | # print(len(replace_side))
203 | # print(len(replace_left))
204 | wt_side_set = replace_side - replace_left
205 | wt_final_set = wt_side_set & wt_set
206 | #print(len(wt_set - wt_side_set))
207 |
208 | insert_deletion = insert & deletion
209 | insert_only = insert - deletion
210 | deletion_only = deletion - insert
211 | #mthreads_all = mtreads - replace
212 | # print(info.loc[idx].Note, end='\t', file=outio)
213 | # print(len(mtreads)/500, end='\t', file=outio)
214 | # print(len(replace)/500, end='\t', file=outio)
215 | # print(len(insert_only)/500, end='\t', file=outio)
216 | # print(len(deletion_only)/500, end='\t', file=outio)
217 | # print(len(insert_deletion)/500, end='\n', file=outio)
218 |
219 | #print(len(covage))
220 | total_read = mtreads | wt_final_set ##去合集
221 | #total_read = mtreads | wt_set ##去合集
222 | filter_read = covage - total_read
223 | print(info.loc[idx].Note, end='\t', file=outio)
224 | #print(len(mtreads)/len(covage)*100, end='\t', file=outio)
225 | print(len(mthreads_all)/len(total_read) * 100, end='\t', file=outio)
226 | print(len(replace)/len(total_read)*100, end='\t', file=outio)
227 | print(len(insert_only)/len(total_read)*100, end='\t', file=outio)
228 | print(len(deletion_only)/len(total_read)*100, end='\t', file=outio)
229 | print(len(insert_deletion)/len(total_read)*100, end='\t', file=outio)
230 |
231 | print(len(mthreads_all), end='\t', file=outio)
232 | print(len(replace), end='\t', file=outio)
233 | print(len(insert_only), end='\t', file=outio)
234 | print(len(deletion_only), end='\t', file=outio)
235 | print(len(insert_deletion), end='\t', file=outio)
236 | print(len(total_read), end='\n', file=outio)
237 |
238 | print(info.loc[idx].Note, end='\t', file=outiofilter)
239 | print(len(filter_read), end='\n', file=outiofilter)
240 | #print(len(filter_read))
241 |
242 | # print(info.loc[idx].Note.'\t'.len(mtreads)/500,len(replace)/500,'\t',len(insert_only)/500,'\t',len(deletion_only)/500,'\t',len(insert_deletion)/500, file=outio)
243 | # print(info.loc[idx].Note, len(mtreads)/500, len(replace)/500, len(insert)/500, len(deletion)/500 )
244 | samfile.close()
245 | outio.close()
246 |
247 | def display_filter(groupinfo, output):
248 | """
249 | :param groupinfo: a description file of details of each group, example: group_infor.txt
250 | :param output: folder of final result
251 | :return:
252 | """
253 |
254 | mutfile = os.path.join(output, 'mut_rate.all.txt')
255 | mut_rate = pd.read_table(mutfile, sep='\t')
256 | groupinfor = pd.read_csv(groupinfo)
257 | groupinfor = groupinfor.dropna(axis=0, how='any',thresh=6) ##过滤表哥中没填满的行,thresh=7表示至少7个数不是NA,控制treatment和CK至少有一个
258 | #groupinfor.ix[:,pd.isnull(groupinfor).all()] = "UNKNOWN"
259 | groupinfor=groupinfor.fillna("UNKNOWN") ##填充表格中NaN处
260 | #print(groupinfor)
261 | mut_result = dict()
262 | for idx in mut_rate.index:
263 | mut_result[mut_rate.loc[idx].Sample] = mut_rate.values[idx] ##读入mutation信息
264 | # mut_result['OsPDS-RZ-gRNA1_Rep1'][2]
265 |
266 | ## prepare for display
267 | #replace = list()
268 | #replace_yerr = list()
269 | mutation=list()
270 | mutation_yerr = list()
271 | insertO = list()
272 | insertO_yerr = list()
273 | deletionO = list()
274 | deletionO_yerr = list()
275 | insert_deletion = list()
276 | insert_deletion_yerr = list()
277 | glist = list()
278 | ck_glist = list()
279 |
280 | ck_mutation = list()
281 | ck_insertO = list()
282 | ck_deletionO = list()
283 | ck_insert_deletion = list()
284 | for idy in groupinfor.index:
285 | rep1 = groupinfor.loc[idy].rep1
286 | rep2 = groupinfor.loc[idy].rep2
287 | rep3 = groupinfor.loc[idy].rep3
288 | ck = groupinfor.loc[idy].control
289 | if (mut_result.__contains__(rep1) and mut_result.__contains__(rep2) and mut_result.__contains__(rep3)):
290 |
291 | # replace_mean = np.mean([mut_result[rep1][2], mut_result[rep2][2]]) ##np.mean([1,2,3,4,5])
292 | # # print(group_mean)
293 | # replace.append(replace_mean)
294 | # replace_std = np.std([mut_result[rep1][2], mut_result[rep2][2]]) ## 标准差
295 | # # print("std", group_var)
296 | # replace_yerr.append(replace_std)
297 | mutation_mean = np.mean([mut_result[rep1][1], mut_result[rep2][1], mut_result[rep3][1]]) ##np.mean([1,2,3,4,5])
298 | # print(group_mean)
299 | mutation.append(mutation_mean)
300 | mutation_std = np.std([mut_result[rep1][1], mut_result[rep2][1], mut_result[rep3][1]]) ## 标准差
301 | # print("std", group_var)
302 | mutation_yerr.append(mutation_std)
303 |
304 | insertO_mean = np.mean([mut_result[rep1][3], mut_result[rep2][3], mut_result[rep3][3]])
305 | insertO.append(insertO_mean)
306 | insertO_std = np.std([mut_result[rep1][3], mut_result[rep2][3], mut_result[rep3][3]])
307 | insertO_yerr.append(insertO_std)
308 |
309 | deletionO_mean = np.mean([mut_result[rep1][4], mut_result[rep2][4], mut_result[rep3][4]])
310 | deletionO.append(deletionO_mean)
311 | deletionO_std = np.std([mut_result[rep1][4], mut_result[rep2][4], mut_result[rep3][4]])
312 | deletionO_yerr.append(deletionO_std)
313 |
314 | insert_deletion_mean = np.mean([mut_result[rep1][5], mut_result[rep2][5], mut_result[rep3][5]])
315 | insert_deletion.append(insert_deletion_mean)
316 | insert_deletion_std = np.std([mut_result[rep1][5], mut_result[rep2][5], mut_result[rep3][5]])
317 | insert_deletion_yerr.append(insert_deletion_std)
318 | elif mut_result.__contains__(rep1) and mut_result.__contains__(rep2):
319 | print("The group:",groupinfor.loc[idy].group, ": Rep3 is missing.")
320 | mutation.append(np.mean([mut_result[rep1][1],mut_result[rep2][1]]))
321 | mutation_yerr.append(np.std([mut_result[rep1][1],mut_result[rep2][1]]))
322 | insertO.append(np.mean([mut_result[rep1][3],mut_result[rep2][3]]))
323 | insertO_yerr.append(np.std([mut_result[rep1][3],mut_result[rep2][3]]))
324 | deletionO.append(np.mean([mut_result[rep1][4],mut_result[rep2][4]]))
325 | deletionO_yerr.append(np.std([mut_result[rep1][4],mut_result[rep2][4]]))
326 | insert_deletion.append(np.mean([mut_result[rep1][5],mut_result[rep2][5]]))
327 | insert_deletion_yerr.append(np.std([mut_result[rep1][5],mut_result[rep2][5]]))
328 | elif mut_result.__contains__(rep1) and mut_result.__contains__(rep3):
329 | print("The group:",groupinfor.loc[idy].group, ": Rep2 is missing.")
330 | mutation.append(np.mean([mut_result[rep1][1],mut_result[rep3][1]]))
331 | mutation_yerr.append(np.std([mut_result[rep1][1],mut_result[rep3][1]]))
332 | insertO.append(np.mean([mut_result[rep1][3],mut_result[rep3][3]]))
333 | insertO_yerr.append(np.std([mut_result[rep1][3],mut_result[rep3][3]]))
334 | deletionO.append(np.mean([mut_result[rep1][4],mut_result[rep3][4]]))
335 | deletionO_yerr.append(np.std([mut_result[rep1][4],mut_result[rep3][4]]))
336 | insert_deletion.append(np.mean([mut_result[rep1][5],mut_result[rep3][5]]))
337 | insert_deletion_yerr.append(np.std([mut_result[rep1][5],mut_result[rep3][5]]))
338 | elif mut_result.__contains__(rep2) and mut_result.__contains__(rep3):
339 | print("The group:",groupinfor.loc[idy].group, ": Rep1 is missing.")
340 | mutation.append(np.mean([mut_result[rep2][1],mut_result[rep3][1]]))
341 | mutation_yerr.append(np.std([mut_result[rep2][1],mut_result[rep3][1]]))
342 | insertO.append(np.mean([mut_result[rep2][3],mut_result[rep3][3]]))
343 | insertO_yerr.append(np.std([mut_result[rep2][3],mut_result[rep3][3]]))
344 | deletionO.append(np.mean([mut_result[rep2][4],mut_result[rep3][4]]))
345 | deletionO_yerr.append(np.std([mut_result[rep2][4],mut_result[rep3][4]]))
346 | insert_deletion.append(np.mean([mut_result[rep2][5],mut_result[rep3][5]]))
347 | insert_deletion_yerr.append(np.std([mut_result[rep2][5],mut_result[rep3][5]]))
348 | elif mut_result.__contains__(rep1):
349 | print("The group:",groupinfor.loc[idy].group, ": Rep2 and Rep3 are missing.")
350 | mutation.append(mut_result[rep1][1])
351 | mutation_yerr.append(0)
352 | insertO.append(mut_result[rep1][3])
353 | insertO_yerr.append(0)
354 | deletionO.append(mut_result[rep1][4])
355 | deletionO_yerr.append(0)
356 | insert_deletion.append(mut_result[rep1][5])
357 | insert_deletion_yerr.append(0)
358 | elif mut_result.__contains__(rep2):
359 | print("The group:",groupinfor.loc[idy].group, ": Rep1 and Rep3 are missing.")
360 | mutation.append(mut_result[rep2][1])
361 | mutation_yerr.append(0)
362 | insertO.append(mut_result[rep2][3])
363 | insertO_yerr.append(0)
364 | deletionO.append(mut_result[rep2][4])
365 | deletionO_yerr.append(0)
366 | insert_deletion.append(mut_result[rep2][5])
367 | insert_deletion_yerr.append(0)
368 | elif mut_result.__contains__(rep3):
369 | print("The group:",groupinfor.loc[idy].group, ": Rep1 and Rep2 are missing.")
370 | mutation.append(mut_result[rep3][1])
371 | mutation_yerr.append(0)
372 | insertO.append(mut_result[rep3][3])
373 | insertO_yerr.append(0)
374 | deletionO.append(mut_result[rep3][4])
375 | deletionO_yerr.append(0)
376 | insert_deletion.append(mut_result[rep3][5])
377 | insert_deletion_yerr.append(0)
378 | else:
379 | print("All repetitions in group:", groupinfor.loc[idy].group, " is missing.")
380 | mutation.append(0)
381 | mutation_yerr.append(0)
382 | insertO.append(0)
383 | insertO_yerr.append(0)
384 | deletionO.append(0)
385 | deletionO_yerr.append(0)
386 | insert_deletion.append(0)
387 | insert_deletion_yerr.append(0)
388 |
389 | if ck=='UNKNOWN':
390 | print("The group:",groupinfor.loc[idy].group, ": CK is missing.")
391 | ck_mutation.append(0)
392 | ck_insertO.append(0)
393 | ck_deletionO.append(0)
394 | ck_insert_deletion.append(0)
395 | else:
396 | ck_mutation.append(mut_result[ck][1])
397 | ck_insertO.append(mut_result[ck][3])
398 | ck_deletionO.append(mut_result[ck][4])
399 | ck_insert_deletion.append(mut_result[ck][5])
400 |
401 |
402 | glist.append(groupinfor.loc[idy].group)
403 | ck_glist.append(groupinfor.loc[idy].control)
404 | ## prepare for display
405 |
406 | ## print out pdf
407 | mutfile = os.path.join(output, 'mut_result.pdf')
408 | fig, (ax0, ax1) = plt.subplots(ncols=2, sharey=True)
409 | fig.set_size_inches(20, 9)
410 | width = 0.15
411 | ymajorFormatter = FormatStrFormatter('%1.1f') ## 设置坐标轴格式
412 | ax0.yaxis.set_major_formatter(ymajorFormatter)
413 | bar1 = ax0.bar(groupinfor.index, mutation, width, color="#CC79A7")
414 | #bar1 = ax0.bar(groupinfor.index, replace, width, color='pink', yerr=replace_yerr, elinewidth=0.1, capsize=1.5)
415 | ax0.errorbar(groupinfor.index, mutation, yerr=mutation_yerr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None', ecolor='black')
416 | bar2 = ax0.bar(groupinfor.index + width, deletionO, width, color="#D55E00")
417 | #bar2 = ax0.bar(groupinfor.index + width, insertO, width, color='green', yerr=insertO_yerr, linewidth=0.5,capsize=1.5)
418 | ax0.errorbar(groupinfor.index+ width, deletionO, yerr=deletionO_yerr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5, ls='None', ecolor='black')
419 | #bar3 = ax0.bar(groupinfor.index + width * 2, deletionO, width, color='blue', yerr=deletionO_yerr, linewidth=0.5,capsize=1.5)
420 | bar3 = ax0.bar(groupinfor.index + width * 2, insertO, width, color="#0072B2")
421 | ax0.errorbar(groupinfor.index + width * 2, insertO, yerr=insertO_yerr, fmt='', elinewidth=0.5, capsize=2, capthick=0.5,ls='None', ecolor='black')
422 | bar4 = ax0.bar(groupinfor.index + width * 3, insert_deletion, width, color="#009E73")
423 | #bar4 = ax0.bar(groupinfor.index + width * 3, insert_deletion, width, color='orange', yerr=insert_deletion_yerr,linewidth=0.5, capsize=1.5)
424 | ax0.errorbar(groupinfor.index + width * 3, insert_deletion, yerr=insert_deletion_yerr, fmt='', elinewidth=0.5, capsize=2,capthick=0.5,ls='None', ecolor='black')
425 |
426 | # ax.bar(reg.index, reg.delrate, color='blue')
427 | ax0.set_title('Treatment', size=15,fontdict = {'family': 'Times New Roman'})
428 | ax0.set_ylabel('All Mutation (%)', size=15,fontdict = {'family': 'Times New Roman'})
429 | ax0.set_xticks(groupinfor.index + 1.5 * width)
430 | #ax0.set_xticklabels(glist, rotation=35, size=6)
431 | ax0.set_xticklabels(glist, rotation=35, fontdict = {'family': 'Arial'}, size = 5)
432 | ax0.legend((bar1[0], bar2[0], bar3[0], bar4[0]), ('mutation_all', 'deletion_only', 'insert_only','insert&&deletion'))
433 |
434 | bar5 = ax1.bar(groupinfor.index, ck_mutation, width, color="#CC79A7")
435 | bar6 = ax1.bar(groupinfor.index + width, ck_deletionO, width, color="#D55E00")
436 | bar7 = ax1.bar(groupinfor.index + width * 2, ck_insertO, width, color="#0072B2")
437 | bar8 = ax1.bar(groupinfor.index + width * 3, ck_insert_deletion, width, color="#009E73")
438 | # ax.bar(reg.index, reg.delrate, color='blue')
439 | ax1.set_title('Control', size=15,fontdict = {'family': 'Times New Roman'})
440 | ax1.set_ylabel('All Mutation (%)', size=15,fontdict = {'family': 'Times New Roman'})
441 | ax1.set_xticks(groupinfor.index + 1.5 * width)
442 | #ax1.set_xticklabels(ck_glist, rotation=35, size=6)
443 | ax1.set_xticklabels(ck_glist, rotation=35, fontdict = {'family': 'Arial'}, size = 5)
444 | ax1.legend((bar5[0], bar6[0], bar7[0], bar8[0]), ('mutation_all', 'deletion_only', 'insert_only', 'insert&&deletion'))
445 | # plt.show()
446 | plt.savefig(mutfile, dpi=300, format="pdf")
447 | plt.close(fig)
448 | ## print out pdf
449 |
450 | # ############## warning message #########
451 | def showwarnings(title, message):
452 | wBox = QtWidgets.QMessageBox()
453 | wBox.setIcon(QtWidgets.QMessageBox.Warning)
454 | wBox.setWindowTitle(title)
455 | wBox.setText(message)
456 | wBox.setStandardButtons(QtWidgets.QMessageBox.Ok)
457 | wBox.exec_()
458 | ##################################################
--------------------------------------------------------------------------------