├── .DS_Store ├── .gitattributes ├── .gitignore ├── 01_Vmap1.1R ├── .DS_Store ├── 01_Xp-clr_Smooth_V1.r ├── 01_Xp-clr_Smooth_V2.r ├── 02_Xp-clr_SelectGene_V1.sh ├── 02_Xp-clr_SelectGene_V2.sh ├── 03_Xp-clr_BayenvOver.sh ├── 03_Xp-clr_Haplotype.sh ├── 03_Xp-clr_NegativeContral.r ├── 03_Xp-clr_to_jiao.r ├── 04_9VIP_Gene_Seq_anno.r ├── 04_9VIP_Xp-clr_Haplotype.sh ├── 04_GeneStructureMa.r ├── 04_GeneStructureMap.r ├── 04_Qmatrix_Visual.r ├── 04_VIP_Gene_XpCLR_signal.r ├── 05_Sample_Cluster.r ├── 06_Pheatmap.r ├── 06_Qmatrix_PieMap.r ├── 07_VCF_Haplotype_Visual.r ├── 08_Fst_density.r ├── 08_Fst_density.sh ├── 08_VIP_Gene_Heatmap.r ├── 09_RDA.r ├── 09_RDA_02(invalid).R ├── 10_Density.r ├── 11_Drift_pi.r ├── 12_FST.r ├── 13_IBS(invalid).r ├── 13_IBS2.r ├── 13_Xp-clr.sh ├── 14_PI.r ├── 15_PCA.r ├── 15_PCA_climate.r ├── 15_VRN.r ├── 16_Barcode.r ├── 17_CreateHmp.r ├── 18_Daf.r ├── 19_Enrich.r ├── 20_ExtractMap.r ├── 21_LDjump.r ├── 22_Maf.r ├── 23_Merge_DoneFile.r ├── 24_MergeTest.r ├── 25_MergeVCF.r ├── 26_Migration.r ├── 27_Permut.r ├── 28_Pluk.r ├── 29_Statistic.r ├── 30_Wenn.r ├── 31_WheatMap.r ├── 32_NeSize.r ├── 33_vf_pipe.sh ├── 34_Vmap3.sh ├── 35_ChrBin.R ├── 35_Storage.sh ├── 36_LDJump.R ├── 37_permut.R ├── 38_Venn.R ├── 39_42-21chr.sh ├── 40_MDS.r ├── 41_Method1.sh ├── 42_MCMC.sh ├── 43_est.sh ├── 44_Bayenv.r ├── 45_Admixture.sh ├── 46_VMap3流程.sh ├── 47_TestGF.r ├── 49_Variants_Age.sh ├── 50_LFMM.r ├── 51_105XD.r ├── 52_37gene_snpEff.r ├── 53_37gene_haplotype.r ├── 55_VMap3_siteQC.r ├── 56_VMap3_taxaQC.r ├── 57_Gene_HaploType.r ├── 57_Gene_HaploType_Map.r ├── 58_PPD.r ├── 59_Depth.R ├── 60_response.R ├── LICENSE ├── Scripts.Rproj ├── assets │ └── 16442473466436.jpg └── 可能有用的smcpp.R ├── 02_Vmap3R ├── .DS_Store ├── 01_Tree_Anno.r ├── 02_Enrich.r ├── 03_Filt_vcf.r ├── 04_IBS.r ├── 05_Map.r ├── 06_Map2.r ├── 07_Nucdiff.sh ├── 08_PCA.r ├── 09_PCA2.r ├── 10_Maf-Dis.r ├── 11_Statistic.sh ├── 12_VcfQc.r ├── 13_VcfStatistic.r └── 14_Snp_Density.sh ├── 03_Analysis ├── .DS_Store ├── 01_ChangeFileFormat_1.r ├── 02_ChangeFileFormat_2.r ├── 03_Basic_Statistic.sh ├── 04_Basic_Statistic_2.r ├── 05_FST_Calculate.r ├── 06_SeperationSite_Calculate.r ├── 07_TajimasD_Calculate.sh ├── 08_PI_Calculate.sh ├── 09_IBS.r ├── 09_IBS_Calculate.sh ├── 10_IceData.r ├── 11_ReadDepth.sh └── 12_BWA.sh ├── 04_GWAS ├── .DS_Store ├── 01_WEGA_gwas.sh ├── 02_Manhattan.r ├── 03_Gene_Domain.r ├── 04_Gene_trait.r ├── 05_Plot_Gene_Trait.r ├── 06_Plot_Rht.r ├── 08_Correlation.r └── 09_envGWAS.r ├── 05_Plot ├── Plot_Density.r ├── Plot_Drift_Pi.r ├── Plot_Finder.r ├── Plot_Fst.r ├── Plot_Haplotype.r ├── Plot_IBS.r ├── Plot_MapPie.r ├── Plot_Pi.r ├── Plot_Pi_02.r └── Plot_Tajima'D.r ├── 06_MarkDown ├── ASMC.md ├── GATK.md ├── MCMC.md ├── Wheat migration routes.md └── bayenv_method.md ├── 07_VMap3 ├── .DS_Store ├── 01_Map.r ├── 02_ASMC.r ├── 03_FastSMC.r ├── 04_BasicStatistics.r ├── 05_TasselGWAS.sh ├── 06_Selscan.r ├── 07_scam.r ├── 08_Raxml-ng.r ├── 09_WheatGo.r ├── 10_SDS.r ├── 11_PiRatio.r ├── 12_Selection.R ├── 13_Baypass.r ├── 14_snpEff.R ├── 15_Rareallele.R ├── 16_Network.R ├── 17_N-content.R ├── 18_Haplotype.R ├── 19_lm.R ├── 20_fst-piR.R ├── 20_smcpp.R ├── 21_sweed.R ├── 22_recombinationRate.R ├── 23_clustersample.R ├── 24_h12.R ├── 25_ehh.R ├── 26_ppi.R ├── 27_xpclr.R ├── 28_envgwas.R ├── 29_correlation.R ├── 30_polygraph.R ├── 31_gradientForest.R ├── 32_coFu.Rmd ├── 33_环境变量降维.r ├── 34_adapgene.R ├── 35_表型处理.R ├── 36_checksample.R ├── 37_traitgwas.R ├── 38_bayesR.R ├── 39_coXiao.R ├── 40_环境适应性位点.R ├── 41_表型环境关联.R ├── 42_群体结构分析.R ├── 43_RDA.R ├── 44_polygenic_adaptation.R ├── 45_partialRda.R ├── 46_fastcall2测试.R ├── 47-物种分布模型.R ├── 48_lassip.R ├── 49_RAiSD.R ├── 50_riskscore.R ├── 51_LinkNe.R ├── 52_JM44.R ├── 53_coTian.R ├── 54_density.R ├── Lulab_germplasm_Info.xlsx ├── Lulab_germplasm_Storage.xlsx ├── VMap3.info ├── VMap3.info2 ├── VMap3_382landrace_82clim_PC10.txt ├── VMap3_523landrace_climPC.txt ├── VMap3_523landrace_withBioclimate.txt └── vmap3.Rmd ├── BSAseq ├── BSA-seq混合分组鉴定功能基因.md ├── BSAseq.R ├── QTLseq_BSA.R ├── gatk_step1_Index.sh ├── gatk_step2_callvar.sh ├── gatk_step3_gvcfmerg.sh └── gatk_step4_filter_SNP.sh ├── Basic_Path.sh ├── Basic_R.r ├── Basic_Shell.sh ├── Code ├── .DS_Store ├── 01_Density.r ├── 02_IBS.r ├── 03_PI.r ├── 04_FST.r ├── 05_Drift_pi.r ├── 06_MDS.r ├── 07_PCA.r ├── 08_ExtractMap.r ├── 09_Admixture.sh ├── 10_Xp-clr.sh ├── 11_Xp-clr_Smooth.r ├── 12_Xp-clr_SelectGene.sh ├── 13_Xp-clr_BayenvOver.sh ├── 14_Xp-clr_Haplotype.sh ├── 15_Xp-clr_NegativeContral.r ├── 16_RDA.r ├── 17_Sample_Cluster.r ├── 18_Pheatmap.r ├── 19_Qmatrix_PieMap.r ├── 20_Qmatrix_Visual.r ├── 21_PCA_climate.r ├── 22_VIP_Gene_XpCLR_signal.r ├── 23_Gene_HaploType_Map.r ├── 24_Gene_HaploType.r ├── 25_Gene_snpEff.r ├── 26_envGWAS.r ├── 27_PPD.r ├── 28_Bayenv.r └── readme.txt ├── README.md ├── R_Code.Rproj └── 小麦环境适应性 ├── 01-环境变量聚类.R ├── 02-位点统计.R ├── 03-位点单倍型分析.R ├── 04-样本分群及遗传分析.R ├── 05_选择信号分析.R ├── 06-snpAttr.R └── 07_growthHabit.R /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/.DS_Store -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html 36 | rsconnect/ 37 | -------------------------------------------------------------------------------- /01_Vmap1.1R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/01_Vmap1.1R/.DS_Store -------------------------------------------------------------------------------- /01_Vmap1.1R/03_Xp-clr_Haplotype.sh: -------------------------------------------------------------------------------- 1 | #working directory 2 | #204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2 3 | 4 | #extract gene_region VCF from 225 all vcf files 5 | grep -w -f 87Gene_id.txt gene_v1.1_Lulab.gff3 | awk -F";" '{print $1}' |awk '{print $1"\t"$4-5000"\t"$5+5000"\t"$9}' | awk -F"\tID=" '{print $2"\t"$1}' > 87gene_5k.txt 6 | #bash getVcf.sh 87gene_5k.txt 7 | cat $1 |while read gene chr from to 8 | do 9 | #echo $chr $from $to 10 | #if echo $2 |grep -q '.*.vcf.gz$';then 11 | # vcftools --gzvcf $2 --chr $chr --from-bp $from --to-bp $to --recode --recode-INFO-all --out $gene.$chr.$from-$to 12 | #elif echo $2 |grep -q '.*.vcf$';then 13 | vcftools --gzvcf /data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/chr$chr.E6_Landrace_locate.vcf.gz --chr $chr --from-bp $from --to-bp $to --recode --recode-INFO-all --out 5k.$gene.$chr.$from-$to 14 | #fi 15 | done 16 | 17 | #传到本地用Java Migration/Haplotype将vcf转换成单倍型txt格式,使用07_VCF_Haplotype_Visual.r进行可视化。 18 | #本地路径:/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/Gene/V2/TXT -------------------------------------------------------------------------------- /01_Vmap1.1R/03_Xp-clr_NegativeContral.r: -------------------------------------------------------------------------------- 1 | #XP-CLR negative contral 2 | library(RColorBrewer) 3 | library(ggplot2) 4 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral") 5 | dist <- read.table("dis_matrix.txt",header=T,stringsAsFactors = F) 6 | out <- strsplit(dist[,1], ":") 7 | dist$ID1 <- NA 8 | dist$ID2 <- NA 9 | a<-1 10 | for (i in c(1:length(out))){ 11 | dist[a,6] <- out[[i]][1] 12 | dist[a,7] <- out[[i]][2] 13 | a <- a+1 14 | } 15 | #dim(dist[which(dist$geopolitical_dis>100 & dist$TEMP_dis<10),]) 16 | WA <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/WA.txt",header =F,stringsAsFactors = F) 17 | North1 <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/North1.txt",header=F,stringsAsFactors = F) 18 | North2 <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/North2.txt",header=F,stringsAsFactors = F) 19 | #Temperature 20 | pdf("Temperature_dist.pdf",height = 10,width = 10) 21 | #WA VS North1 22 | dist$Type1 <- NA 23 | dist[which((dist$ID1 %in% WA[,1] & dist$ID2 %in% North1[,1])) ,8] <- "Yes" 24 | dist[which((dist$ID1 %in% North1[,1] & dist$ID2 %in% WA[,1])) ,8] <- "Yes" 25 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) + 26 | geom_point( size=3,alpha=0.2)+ 27 | theme_classic()+ 28 | ggtitle("WA VS North1")+ 29 | theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 30 | print(p) 31 | #WA VS North2 32 | dist$Type1 <- NA 33 | dist[which((dist$ID1 %in% WA[,1] & dist$ID2 %in% North2[,1])) ,8] <- "Yes" 34 | dist[which((dist$ID1 %in% North2[,1] & dist$ID2 %in% WA[,1])) ,8] <- "Yes" 35 | 36 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) + 37 | geom_point( size=3,alpha=0.2)+ 38 | theme_classic()+ 39 | ggtitle("WA VS North2")+ 40 | theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 41 | 42 | print(p) 43 | 44 | #North1 VS North2 45 | dist$Type1 <- NA 46 | dist[which((dist$ID1 %in% North2[,1] & dist$ID2 %in% North1[,1])),8] <- "Yes" 47 | dist[which((dist$ID1 %in% North1[,1] & dist$ID2 %in% North2[,1])),8] <- "Yes" 48 | 49 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) + 50 | geom_point( size=3,alpha=0.2)+ 51 | theme_classic()+ 52 | ggtitle("North1 VS North2")+ 53 | theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 54 | print(p) 55 | dev.off() 56 | -------------------------------------------------------------------------------- /01_Vmap1.1R/04_9VIP_Xp-clr_Haplotype.sh: -------------------------------------------------------------------------------- 1 | #具体分析一些样本的单倍型,总共分成3类 2 | #1. 保护祖先种的意义: 祖先种中存在的变异,在传播的过程中丢失。 3 | #eg(抗病): Sr33(TraesCS1D02G029100), Sr45(TraesCS1D02G040400), Tsn1(TraesCS5B02G059000) 4 | #2. 保护地区特有品种的意义: 祖先种中没有,在传播的过程中适应区域内环境,形成新的变异。 5 | #eg(氮吸收,株高): NGR5_1(TraesCS1A02G242800), GID-A1(TraesCS1A02G255100), Rht-A1(TraesCS4A02G271000), Rht-B1(TraesCS4B02G043100) 6 | #3. 单倍型在区域变化的例子。 7 | #eg(开花,光敏): TaFT8-2_1(TraesCS2A02G485000), TaPHYC_1(TraesCS5A02G391300) 8 | 9 | #working directory 10 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2/VIP_genes 11 | #提取祖先的对应位点vcf 12 | #VIP_gene.txt(Old) 13 | TraesCS1D02G029100 14 | TraesCS1D02G040400 15 | TraesCS5B02G059000 16 | TraesCS1A02G242800 17 | TraesCS1A02G255100 18 | TraesCS4A02G271000 19 | TraesCS4B02G043100 20 | TraesCS2A02G485000 21 | TraesCS5A02G391300 22 | #VIP_gene.txt(New) 23 | Ppd-A1 24 | Ppd-1 25 | VRN2-2 26 | 27 | #提取带祖先的样本 28 | grep -w -f VIP_gene.txt 87gene_5k.txt > VIPgene_5k.txt 29 | bash getVcf.sh VIPgene_5k.txt > VIPgene_5k.log 30 | #提取在六倍体里面分离的位点 31 | 32 | for i in `cat VIP_gene.txt`; do sed '/#/d' *$i*.vcf | awk '{print $1"\t"$2}'; done > VIP_gene.pos 33 | 34 | for i in `ls *vcf`; do vcftools --vcf $i --positions VIP_gene.pos --recode --out ${i::-11}.pos; done 35 | #1.传到本地用Java Migration/Haplotype将vcf转换成单倍型txt格式,使用07_VCF_Haplotype_Visual.r进行可视化。 36 | #本地路径:/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/VIP_gene/ 37 | #2.使用04_9VIP_Gene_Seq_anno.r进行变异注释。 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /01_Vmap1.1R/04_GeneStructureMa.r: -------------------------------------------------------------------------------- 1 | #Ppd-1和VRN基因随纬度变化的structure分布 2 | #Working diirectory: 203:yafei:/data1/home/yafei/008_Software/snpEff/Xp-clr_6VIP 3 | library(pheatmap) 4 | require(reshape) 5 | require (rworldmap) 6 | require(rworldxtra) 7 | library(pophelper) 8 | library(ggplot2) 9 | require(gridExtra) 10 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/Structure") 11 | #load Qmatrix files 12 | #Ppd-1 13 | sfiles <- list.files(path="/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/Structure/Ppd-1", full.names=T) 14 | slist <- readQ(files=sfiles) 15 | tabulateQ(qlist=readQ(sfiles)) 16 | summariseQ(tabulateQ(qlist=readQ(sfiles))) 17 | #VRN2-2 18 | sfiles <- list.files(path="/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/Structure/VRN2-2", full.names=T) 19 | slist <- readQ(files=sfiles) 20 | tabulateQ(qlist=readQ(sfiles)) 21 | summariseQ(tabulateQ(qlist=readQ(sfiles))) 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /01_Vmap1.1R/04_Qmatrix_Visual.r: -------------------------------------------------------------------------------- 1 | #统一设置:注释内容及颜色 2 | library(pheatmap) 3 | require(reshape) 4 | require (rworldmap) 5 | require(rworldxtra) 6 | library(pophelper) 7 | library(ggplot2) 8 | require(gridExtra) 9 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/06_Structure/V1/Select") 10 | #load Qmatrix files 11 | sfiles <- list.files(path="/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/06_Structure/V1/Select/QMatrix", full.names=T) 12 | slist <- readQ(files=sfiles) 13 | tabulateQ(qlist=readQ(sfiles)) 14 | summariseQ(tabulateQ(qlist=readQ(sfiles))) 15 | 16 | pdf("Q.pdf",width = 24,height = 8) 17 | p1 <- plotQ(slist[1:7],returnplot=T,exportplot=F,quiet=T,basesize=11, 18 | sortind="all",showindlab=F,showyaxis=T,showticks=T,sharedindlab=T,clustercol=brewer.pal(7, "Set2")) 19 | grid.arrange(p1$plot[[1]],p1$plot[[2]],p1$plot[[3]],p1$plot[[4]],p1$plot[[5]],nrow=5) 20 | dev.off() 21 | 22 | library(readxl) 23 | library(ggmap) 24 | library(RColorBrewer) 25 | setwd("/Users/guoyafei/Documents/02_VmapIII/06_Selection/Fst/") 26 | group <- read.table("group.txt",header=F,stringsAsFactors = F) 27 | a <- read_excel("/Users/guoyafei/RstudioProjects/GitHub/R_Code/07_VMap3/Lulab_germplasm_Info.xlsx", sheet=1, na='NA') 28 | b<-a[which(a$`Taxa(vmap3)` %in% group$V3),c(1,11,12)] 29 | rownames(group)<-group$V3 30 | c <- cbind(b,group[b$`Taxa(vmap3)`,]) 31 | 32 | mp <- NULL 33 | mapworld <- borders("world",colour = "gray70",fill="white") 34 | mp <- ggplot() + 35 | mapworld + 36 | #xlim(0,90) + 37 | #ylim(10,70) + 38 | theme_classic() 39 | color <- brewer.pal(8, "Dark2")[c(1,2,3,4,5,6,7)] 40 | color <- brewer.pal(8, "Dark2")[1:7] 41 | data <- c[which(c$V1=="domemmer"),] 42 | data <- c[which(c$V1=="freethresh"),] 43 | data <- c[which(c$V1=="wildemmer"),] 44 | #AABBDD 45 | sub <- data[which(data$Region %in% c("EU","WA","EA2","SH","IA")),] 46 | mp+geom_point(aes(x=sub$Longitude, y=sub$Latitude, color=sub$Region,alpha=0.3),size=1)+ 47 | scale_size(range=c(1,1)) + 48 | scale_color_manual(values = color) + 49 | #theme_classic() + 50 | #theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+ 51 | #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 52 | # breaks=c("AA", "SS", "DD", "AABB","AABBDD"), 53 | # labels=c("AA", "SS", "DD", "AABB","AABBDD"))+ 54 | theme(axis.text = element_blank(), 55 | axis.ticks = element_blank(), 56 | axis.title = element_blank(), 57 | panel.border = element_blank()) 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /01_Vmap1.1R/04_VIP_Gene_XpCLR_signal.r: -------------------------------------------------------------------------------- 1 | library(qqman) 2 | #画信号位点基因的XP-CLR的染色体信号 3 | #Working directory 4 | #xuebo@204:/data2/xuebo/Projects/Speciation/xpclr/Selection_V3/smooth/lineage_V2/Top5%/VIP_gene_XpCLR 5 | #VIP_gene 6 | #1. TraesCS4D02G364400 Vrn2-2 24 58277633 58279728 chr4D:509282253-509284348(-) Transcription factor GHD7 [UniProtKB/Swiss-Prot:E5RQA1] Os10g0560400 NA 7 | #2. TraesCS2D02G079600 Ppd-1(PRR) 11 33952048 33956269 chr2D:33952048-33956269(-) Two-component response regulator-like PRR37 [UniProtKB/Swiss-Prot:A2YQ93] Os07g0695100 NA 8 | #3. TraesCS2A02G081900 Ppd-A1 7 36933684 36938202 chr2A:36933684-36938202(-) Two-component response regulator-like PRR37 [UniProtKB/Swiss-Prot:A2YQ93] Os07g0695100 NA 9 | #4. TraesCS5A02G473800 Q-5A 26 196896739 196900381 chr5A:650127258-650130900(-) APETALA2-like protein 2 [UniProtKB/Swiss-Prot:Q84TB5] Os03g0818800 NA 10 | #5. TraesCS4B02G043100 Rht-B1 21 30861268 30863723 chr4B:30861268-30863723(+) DELLA protein RHT-1 [UniProtKB/Swiss-Prot:Q9ST59] Os03g0707600 AT1G66350 11 | #6. TraesCS1D02G029100 Sr33 5 11451423 11459353 chr1D:11451423-11459353(+) Disease resistance protein RGA5 [UniProtKB/Swiss-Prot:F7J0N2] NA AT3G46530 12 | #7. TraesCS1D02G040400 Sr45 5 19341296 19346065 chr1D:19341296-19346065(+) Putative disease resistance protein RGA4 [UniProtKB/Swiss-Prot:Q7XA39] Os10g0130800 N 13 | 14 | #File Gene_Name Chr Start Stop 15 | #North2_South_smooth_A Ppd-A1 7 36933684 36938202 16 | #North2_South_smooth_B Rht-B1 21 30861268 30863723 17 | #North2_South_smooth_D Sr45 5 19341296 19346065 18 | #WA_EU_smooth_A Q-5A 26 196896739 196900381 19 | #WA_EU_smooth_D Sr45 5 19341296 19346065 20 | #EU_South_smooth_B Rht-B1 21 30861268 30863723 21 | #EU_South_smooth_D Flowering1 24 58277633 58279728 22 | #EU_South_smooth_D Sr45 5 19341296 19346065 23 | #WA_South_smooth_D Flowering1 24 58277633 58279728 24 | #WA_South_smooth_D Sr33 5 11451423 11459353 25 | #Tibet_South_smooth_D Ppd-1(PRR) 11 33952048 33956269 26 | 27 | cat $1 |while read file chr1 chr2 28 | do 29 | awk '{if($1== "'${chr1}'" || $1=="'${chr2}'") {print $0}}' ../../${file}| sort -k1,1n -k2,2g |sed '1i Chr\tWindowStart\tWindowStop\tSNPcount\tMeanY\tWstat' > ${file::-4}.${chr1}.${chr2}.txt 30 | done 31 | 32 | #把42条染色体合并成21条 33 | #转移到本地画图 34 | #/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3 35 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3") 36 | #批量读取Xp-clr结果文件 37 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3/TXT" ##文件目录 38 | fileNames <- dir(path) ##获取该路径下的文件名 39 | filePath <- sapply(fileNames, function(x){ 40 | paste(path,x,sep='/')}) ##生成读取文件路径 41 | data <- lapply(filePath, function(x){ 42 | read.table(x, header=T,stringsAsFactors = F)}) 43 | 44 | #读取阈值文件 45 | thresHold <- read.table("thresHold.txt",header=T,stringsAsFactors = F) 46 | rownames(thresHold) <- thresHold$GEO_region 47 | #读取着丝粒文件 48 | centromer <- read.table("centromerer.txt",header=T,stringsAsFactors = F) 49 | rownames(centromer) <- centromer$chr 50 | 51 | #读取VIP基因文件 52 | gene <- read.table("VIP_gene.txt",header=T,stringsAsFactors = F) 53 | rownames(gene) <- paste(gene$File,gene$Position,sep=".") 54 | out1 <- strsplit(sub('.chr',':chr', names(data)), ":") 55 | out2 <- strsplit(sub('.txt',':txt', names(data)), ":") 56 | 57 | pdf("Xp-clr.pdf",width = 10,height = 5) 58 | for(i in c(1:length(data))){ 59 | name <- out1[[i]][1] 60 | tit <- out2[[i]][1] 61 | chr <- strsplit(out1[[i]][2], ".txt")[[1]][1] 62 | title <- paste(gene[tit,1],gene[tit,2],sep=":") 63 | cen <- centromer[chr,4] 64 | plot(data[[i]]$WindowStart/1000000, data[[i]]$MeanY, ylim = c(-2,50),axes = T,col = "skyblue",type="l",cex = 2,lwd = 3,ann = F) 65 | abline(v = gene[tit,7]/1000000, col = "red", cex=2,lwd = 1) 66 | abline(h=thresHold[name,3], col = "red", lty = 3,cex=2,lwd = 2) 67 | title(title,xlab= chr, ylab = 'Xp-clr', font.lab = 1, cex.lab = 1.5) 68 | points(cen/1000000,0, pch=20,cex=2,col="grey") 69 | } 70 | dev.off() 71 | -------------------------------------------------------------------------------- /01_Vmap1.1R/05_Sample_Cluster.r: -------------------------------------------------------------------------------- 1 | library(cluster) 2 | library(factoextra) 3 | #聚类 4 | #根据经纬度给样本聚类 5 | #data <- read.table("land.txt", header=T, sep="\t", stringsAsFactors = F) 6 | #dataA <- data[,c(3,4)] 7 | #data2 <- dataA[!is.na(dataA$Latitude),] 8 | 9 | data <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/04_bayenv/V1/225env.txt", header=F,stringsAsFactors = F) 10 | colname <- c("elevation","temp1","temp2","temp3","temp4","temp5","temp6","temp7","temp8","temp9","temp10","temp11","prec1","prec2","prec3","prec4","prec5","prec6","prec7","prec8","Latitude","Logititude") 11 | rownames(data) <- data[,1] 12 | data2 <- data[!is.na(data$V6),-1] 13 | colnames(data2) <- colname 14 | df = scale(data2,center = T,scale = T) 15 | colnames(df) <- colname 16 | #按列进行标准化 17 | #先求样本之间两两相似性 18 | result <- dist(df, method = "euclidean") 19 | #使用指定距离来计算数据矩阵行之间的距离 20 | #euclidean:欧几里得距离 21 | #maximum:最大距离 22 | #manhattan:绝对距离 23 | #canberra:堪培拉距离 24 | #minkowski:闵可夫斯基距离 25 | #产生层次结构 26 | result_hc <- hclust(d = result, method = "ward.D2") 27 | #Ward: 最小方差方法旨在寻找紧凑的球形簇的完整的联动方法找到相似集群。 28 | #有两种不同的算法,"ward.D"(相当于只沃德选择"ward"不执行沃德(1963)聚类准则)& "ward.D2"实现了标准(Murtagh的及Legendre 2014)在集群更新之前对差异进行平方。注意agnes(*, method="ward")对应于hclust(*, "ward.D2"). 29 | #median和centroid在不导致单调的距离测量,或者等效产生的树状图可以具有所谓的倒置或颠倒。 30 | 31 | data2$type <- cutree(result_hc, k=30) 32 | lat_mean <- tapply(data2[,21],data2$type,mean,na.rm = TRUE) 33 | lon_mean <- tapply(data2[,22],data2$type,mean,na.rm = TRUE) 34 | data2$cluster1 <- NA 35 | data2$cluster2 <- NA 36 | for(i in 1:224) { 37 | for(j in 1:30){ 38 | if(data2[i,23] == j ){ 39 | data2[i,24] <- as.numeric(lat_mean[j]) 40 | data2[i,25] <- as.numeric(lon_mean[j]) 41 | } 42 | } 43 | } 44 | write.table(data2,"30_cluster.txt",sep="\t",row.names = F,quote=F) 45 | 46 | library(maps) 47 | mp<-NULL 48 | mapworld<-borders("world",colour = "gray70",fill="gray70") 49 | mp<-ggplot()+mapworld+ylim(-90,90) 50 | mp_40<-mp+geom_point(aes(x=re$Longitude_40, y=re$Latitude_40))+ 51 | scale_size(range=c(1,1))+ 52 | theme_classic() -------------------------------------------------------------------------------- /01_Vmap1.1R/06_Qmatrix_PieMap.r: -------------------------------------------------------------------------------- 1 | #统一设置:注释内容及颜色 2 | library(pheatmap) 3 | require(reshape) 4 | require (rworldmap) 5 | require(rworldxtra) 6 | library(RColorBrewer) 7 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5") 8 | annotation_col <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Taxa_Region_225.txt",header=T,stringsAsFactors = F) 9 | rownames(annotation_col) = c(1:325) 10 | seq <- annotation_col[,1] 11 | #cluster samples: 按照算Xp-clr的区域划分 12 | out <- annotation_col[,c(4,5)] 13 | lat_mean <- tapply(out[,1],out$Region_sub2,mean,na.rm = TRUE) 14 | lon_mean <- tapply(out[,2],out$Region_sub2,mean,na.rm = TRUE) 15 | out$cluster1 <- NA 16 | out$cluster2 <- NA 17 | for(i in 1:225) { 18 | for(j in names(lat_mean)){ 19 | if(out[i,3] == j ){clas 20 | out[i,4] <- as.numeric(lat_mean[j]) 21 | out[i,5] <- as.numeric(lon_mean[j]) 22 | } 23 | } 24 | } 25 | mode <- as.data.frame(cbind(annotation_col[,2],as.numeric(out[,4]),as.numeric(out[,5])),stringsAsFactors = F) 26 | mode$V2 <- as.numeric(mode$V2) 27 | mode$V3 <- as.numeric(mode$V3) 28 | #map 29 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/TXT" ##文件目录 30 | fileNames <- dir(path) ##获取该路径下的文件名 31 | filePath <- sapply(fileNames, function(x){ 32 | paste(path,x,sep='/')}) ##生成读取文件路径 33 | data <- lapply(filePath, function(x){ 34 | read.table(x, header=F,stringsAsFactors = F)}) 35 | pdf("cut_8_piemap.pdf") 36 | for (i in c(1:82)){ 37 | all <- as.matrix(data[[i]]) 38 | colnames(all) <- c(1:361) 39 | all <- all[,seq] 40 | data.e <- dist(t(all)) 41 | model <- hclust(data.e,method = "complete") 42 | #plot(model) 43 | result <- as.numeric(cutree(model, k=8)) 44 | out2 <- as.data.frame(cbind(mode,result)) 45 | out2$value <- 1 46 | colnames(out2)[1:4] <- c("Latitude", "Logititude", "Sub.population.2", "value") 47 | wheat2 = out2[!is.na(out2$Latitude),] 48 | wheat = wheat2[!is.na(wheat2$Sub.population.2),] 49 | wheat_reshape <- cast(test,Latitude+Logititude~Sub.population.2) 50 | wheat_reshape2 <- as.data.frame(wheat_reshape) 51 | mapPies(wheat_reshape2,xlim=c(-120,140),ylim=c(0,40),nameX="Logititude",nameY="Latitude",nameZs=c("1","2","3"),symbolSize=1, 52 | zColours=brewer.pal(8, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main=i) 53 | } 54 | dev.off() 55 | 56 | -------------------------------------------------------------------------------- /01_Vmap1.1R/10_Density.r: -------------------------------------------------------------------------------- 1 | #install.packages('RIdeogram') 2 | require(RIdeogram) 3 | 4 | setwd("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/13_Plots/01_Density") 5 | #gene_density <- read.table("ArinaLrFor_LTR_1.txt", header=T,stringsAsFactors = F) 6 | gene_density2 <- read.table("gene_density.txt", header=T, stringsAsFactors = F) 7 | gene_density <- read.table("21-1M_VMap3_SnpDensity.txt", header=T, stringsAsFactors = F) 8 | wheat_karyotype <- read.table("wheat_karyotype.txt", header=T, stringsAsFactors = F) 9 | ideogram(karyotype = wheat_karyotype, overlaid = gene_density2) 10 | convertSVG("chromosome.svg", device = "pdf") 11 | 12 | #wheat_karyotype 13 | #Chr Start End CE_start CE_end 14 | #1 0 248956422 122026459 124932724 15 | #2 0 242193529 92188145 94090557 16 | 17 | #gene_density 18 | #Chr Start End Value 19 | #1 1 1000000 65 20 | #1 1000001 2000000 76 21 | 22 | 23 | 24 | library(CMplot) 25 | setwd("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据") 26 | mydata<-read.table("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据/fastcall2_001_pos.txt",header=TRUE,sep="\t") 27 | head(mydata) 28 | # snp chr pos 29 | # snp1_1 1 2041 30 | # snp1_2 1 2062 31 | # snp1_3 1 2190 32 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 33 | mydata<-read.table("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据/fastcall_001_pos.txt",header=TRUE,sep="\t") 34 | head(mydata) 35 | # snp chr pos 36 | # snp1_1 1 2041 37 | # snp1_2 1 2062 38 | # snp1_3 1 2190 39 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 40 | 41 | 42 | #lineage density 43 | A <- read.table("Alineage.txt",header=F,stringsAsFactors = F) 44 | ggplot(A, mapping = aes(x = V4)) + 45 | geom_density( alpha = 0.5, color = "#999999",fill="#999999") + 46 | #geom_density(fill = "blue",alpha = 0.3,color="blue")+ 47 | theme_bw() 48 | 49 | 50 | B <- read.table("Blineage.txt",header=F,stringsAsFactors = F) 51 | ggplot(B, mapping = aes(x = V4)) + 52 | geom_density( alpha = 0.5, color = "#377EB8",fill="#377EB8") + 53 | #geom_density(fill = "blue",alpha = 0.3,color="blue")+ 54 | theme_bw() 55 | 56 | D <- read.table("Dlineage.txt",header=F,stringsAsFactors = F) 57 | ggplot(D, mapping = aes(x = V4)) + 58 | geom_density( alpha = 0.5, color = "#FF7F00",fill="#FF7F00") + 59 | #geom_density(fill = "blue",alpha = 0.3,color="blue")+ 60 | theme_bw() 61 | 62 | ggplot(D, mapping = aes(x = V4),color = "blue",fill="blue") + 63 | geom_line(stat = "density") 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /01_Vmap1.1R/13_Xp-clr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sleep 4800s 3 | for i in {1..42} 4 | do 5 | WGS --model vcf --type toXPCLR --group1 ../groupTaxa/EA.txt --rec ../../slidewindow_recomrate_updown_20M.txt --chr $i --file /data2/xuebo/Projects/Speciation/E6/chr${i}.Land.vcf.gz --out groupEAChr${i} & 6 | done 7 | 8 | #!/bin/bash 9 | for i in {1..42} 10 | do 11 | XPCLR -xpclr ../groupEA/groupEAChr${i}.geno ../groupWA/groupWAChr${i}.geno ../groupWA/groupWAChr${i}.snp EA_WA_10kchr${i} -w1 0.005 500 10000 $i -p1 0.95 & 12 | done 13 | -------------------------------------------------------------------------------- /01_Vmap1.1R/15_PCA.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral") 2 | library("FactoMineR") 3 | library("ggplot2") 4 | library("factoextra") 5 | Elevation <- read.csv("Elevation.txt",fill=TRUE, row.names = 1,na.strings = "",header = T, sep="\t", stringsAsFactors = F) 6 | Temp <- read.table("Temp.txt", header=T,row.names = 1,stringsAsFactors = F) 7 | Prec <- read.table("Prec.txt",header=T,row.names = 1,stringsAsFactors = F) 8 | bio <- read.table("20bio.txt",header=T,stringsAsFactors = F) 9 | dt <- as.matrix(scale(bio[!duplicated(bio, fromLast=TRUE),])) 10 | rm1<-cor(dt) 11 | rm1 12 | rs1<- eigen(rm1) 13 | #提取结果中的特征值,即各主成分的方差; 14 | val <- rs1$values 15 | 16 | #换算成标准差(Standard deviation); 17 | (Standard_deviation <- sqrt(val)) 18 | #计算方差贡献率和累积贡献率; 19 | (Proportion_of_Variance <- val/sum(val)) 20 | (Cumulative_Proportion <- cumsum(Proportion_of_Variance)) 21 | #碎石图绘制; 22 | par(mar=c(6,6,2,2)) 23 | plot(rs1$values,type="b", 24 | cex=2, 25 | cex.lab=2, 26 | cex.axis=2, 27 | lty=2, 28 | lwd=2, 29 | xlab = "PC", 30 | ylab="Eigenvalue (Principal Component Variance)") 31 | #提取结果中的特征向量(也称为Loadings,载荷矩阵); 32 | (U<-as.matrix(rs1$vectors)) 33 | #进行矩阵乘法,获得PC score; 34 | PC <- dt %*% U 35 | colnames(PC) <- paste("PC",1:20,sep="") 36 | head(PC) 37 | 38 | #将iris数据集的第5列数据合并进来; 39 | #df<-data.frame(PC, dt_all$ploidy, dt_all$Region) 40 | #head(df) 41 | 42 | library(ggplot2) 43 | #提取主成分的方差贡献率,生成坐标轴标题; 44 | xlab<-paste0("PC1(",round(Proportion_of_Variance[1]*100,2),"%)") 45 | ylab<-paste0("PC2(",round(Proportion_of_Variance[2]*100,2),"%)") 46 | #绘制散点图并添加置信椭圆; 47 | p1<-ggplot(data = df,aes(x=PC1,y=PC2,color=df$dt_all.ploidy))+ 48 | stat_ellipse(aes(fill=df$dt_all.ploidy), 49 | type ="norm", geom ="polygon",alpha=0.2,color=NA)+ 50 | geom_point()+labs(x=xlab,y=ylab,color="")+ 51 | guides(fill=F)+ 52 | theme_classic() 53 | p2<-ggplot(data = PC,aes(x=PC1,y=PC2))+ 54 | geom_point()+labs(x=xlab,y=ylab,color="")+ 55 | guides(fill=F)+ 56 | theme_classic() 57 | p1 58 | p2 59 | 60 | #------ 61 | dt_all$lat_bin <- NA 62 | dt_all[which(dt_all$lat >= -45 & dt_all$lat < -35),24] <- "[-45,-35)" 63 | dt_all[which(dt_all$lat >= -35 & dt_all$lat < -25),24] <- "[-35,-25)" 64 | dt_all[which(dt_all$lat >= -25 & dt_all$lat < -15),24] <- "[-25,-15)" 65 | dt_all[which(dt_all$lat >= -15 & dt_all$lat < -5),24] <- "[-15,-5)" 66 | dt_all[which(dt_all$lat >= -5 & dt_all$lat < 5),24] <- "[-5,5)" 67 | dt_all[which(dt_all$lat >= 5 & dt_all$lat < 15),24] <- "[5,15)" 68 | dt_all[which(dt_all$lat >= 15 & dt_all$lat < 25),24] <- "[15,25)" 69 | dt_all[which(dt_all$lat >= 25 & dt_all$lat < 35),24] <- "[25,35)" 70 | dt_all[which(dt_all$lat >= 35 & dt_all$lat < 45),24] <- "[35,45)" 71 | dt_all[which(dt_all$lat >= 45 & dt_all$lat < 55),24] <- "[45,55)" 72 | dt_all[which(dt_all$lat >= 55 & dt_all$lat < 65),24] <- "[55,65)" 73 | dt_all[which(dt_all$lat >= 65 & dt_all$lat < 75),24] <- "[65,75)" 74 | df$lat_bin <- dt_all$lat_bin 75 | p2<-ggplot(data = df,aes(x=PC1,y=PC2,color=df$lat_bin))+ 76 | stat_ellipse(aes(fill=df$dt_all.Region), 77 | type ="norm", geom ="polygon",alpha=0.2,color=NA)+ 78 | geom_point()+labs(x=xlab,y=ylab,color="")+ 79 | guides(fill=F)+ 80 | theme_classic() 81 | p1 82 | 83 | -------------------------------------------------------------------------------- /01_Vmap1.1R/15_PCA_climate.r: -------------------------------------------------------------------------------- 1 | #working directory 2 | #/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/NegativeContral 3 | library(RColorBrewer) 4 | library(ggmap) 5 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral") 6 | location <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/01_IBS/04_795taxaIBS/795_Location.txt",header=T,stringsAsFactors = F) 7 | 8 | Elevation <- read.table("Elevation.txt",header=T,stringsAsFactors = F) 9 | ele <- merge(Elevation,location,by="ID") 10 | colnames(ele)[2] <-"PC1" 11 | 12 | Temp <- read.table("temp_PC.txt",header=T,stringsAsFactors = F) 13 | Temp$ID <- rownames(Temp) 14 | tem <- merge(Temp,location,by="ID") 15 | 16 | Prec <- read.table("Prec_PC.txt",header=T,stringsAsFactors = F) 17 | Prec$ID <- rownames(Prec) 18 | pre <- merge(Prec,location,by="ID") 19 | 20 | bio <- read.table("20bio_PC.txt",header=T,row.names = 1) 21 | #bio$ID <- rownames(bio) 22 | #bio <- merge(bio,location,by="ID") 23 | 24 | data <- list(ele,tem,pre) 25 | pdf("20bio_PC1.pdf",width = 12,height = 7.5) 26 | for(i in c(1:2)){ 27 | mp <- NULL 28 | mapworld <- borders("world",colour = "grey90",fill="white") 29 | mp <- ggplot()+mapworld+ylim(-60,90)+xlim(-25,200) 30 | mp2 <- mp+geom_point(aes(x=bio$Logititude,y=bio$Latitude,color=bio$PC1),size=2.6)+scale_size(range=c(1,1))+ 31 | scale_colour_gradientn(colours = brewer.pal(11, "RdYlBu"))+ 32 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))+ 33 | theme(panel.grid = element_blank()) + 34 | #theme(panel.grid =element_blank()) + ## 删去网格线 35 | #theme(axis.text = element_blank()) + ## 删去刻度标签 36 | #theme(axis.ticks = element_blank()) + ## 删去刻度线 37 | theme(panel.border = element_blank()) 38 | print(mp2) 39 | } 40 | dev.off() 41 | 42 | 43 | #看筛选样本的分布位点 44 | #taxa <- read.table("group_V2/max_temp.txt",header=F,stringsAsFactors = F) 45 | #colnames(taxa)[1] <-"ID" 46 | #max <- merge(taxa,location,by="ID") 47 | 48 | -------------------------------------------------------------------------------- /01_Vmap1.1R/16_Barcode.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Desktop/") 2 | data <- read.table("Barcode",header=F,stringsAsFactors = F) 3 | data <- as.data.frame.matrix(data) 4 | mylist <- vector( mode = "list" ) 5 | for(i in 1:108){ 6 | if(length(strsplit(data[i,1],"")[[1]])==4){ 7 | for(j in 1:2){ 8 | print(j) 9 | print(substr(data[i,1], j, j+2)) 10 | }}else if(length(strsplit(data[i,1],"")[[1]])==5){ 11 | for(j in 1:2){ 12 | print(j) 13 | print(substr(data[i,1], j, j+3)) 14 | } 15 | } else if(length(strsplit(data[i,1],"")[[1]])==6){ 16 | for(j in 1:2){ 17 | print(j) 18 | print(substr(data[i,1], j, j+4)) 19 | }} else { 20 | for(j in 1:2){ 21 | print(j) 22 | print(substr(data[i,1], j, j+5)) 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /01_Vmap1.1R/17_CreateHmp.r: -------------------------------------------------------------------------------- 1 | rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode 2 | data <- read.table("output.txt",header=T,stringsAsFactors = F) 3 | pos <- read.table("WorldFilterd.pos",header=T,stringsAsFactors = F) 4 | colnames(data)[4] <- "rs#" 5 | colnames(data)[5] <- "strand" 6 | data[,5] <- NA 7 | colnames(data)[6] <- "chrom" 8 | colnames(data)[7] <- "pos" 9 | colnames(data)[1] <- "assembly#" 10 | colnames(data)[2] <- "center" 11 | colnames(data)[3] <- "protLSID" 12 | colnames(data)[8] <- "assayLSID" 13 | 14 | alleles <- paste(pos[,2],pos[,3],sep="/") 15 | data$alleles <- alleles 16 | data$panelLSID <- NA 17 | data$QCcode <- NA 18 | data[,1] <- NA 19 | data[,2] <- NA 20 | data[,3] <- NA 21 | data[,8] <- NA 22 | all <- data[,c(4,755,6,7,5,1,2,3,8,756,757,9:754)] 23 | all2 <- all[which(all$chrom != "NA"),] 24 | write.table(all2,"World.hmp.txt",quote=F,row.names = F,sep="\t") 25 | -------------------------------------------------------------------------------- /01_Vmap1.1R/18_Daf.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/07_Daf") 2 | A <- read.table("A_daf",header=F,stringsAsFactors = F) 3 | B <- read.table("B_daf",header=F,stringsAsFactors = F) 4 | D <- read.table("D_daf",header=F,stringsAsFactors = F) 5 | library("ggplot2") 6 | ggplot(all, aes(x=V1,fill=lineage)) + 7 | facet_grid(lineage ~ .)+ 8 | geom_histogram(aes(y=..density..),binwidth=0.02, alpha=.7)+ 9 | theme_classic()+ 10 | theme(axis.text.x=element_text(size=200), 11 | axis.text.y=element_text(size=300,face="plain"), 12 | axis.title.y=element_text(size = 300,face="plain")) 13 | -------------------------------------------------------------------------------- /01_Vmap1.1R/21_LDjump.r: -------------------------------------------------------------------------------- 1 | require(LDJump) 2 | results = LDJump("/data1/home/yafei/LDJump-test/HatLandscapeN16Len1000000Nrhs15_th0.01_540_1.fa", alpha = 0.05, segLength = 1000, pathLDhat = "/data1/home/yafei/Software/LDhat-master/", pathPhi = "/data1/home/yafei/Software/PhiPack/Phi", format = "fasta", refName = NULL) 3 | postscript("Results.eps", horiz = F) 4 | plot(results[[1]], xlab = "Segments", ylab = "Estimated Recombination Rate", main = "Estimated recombination map with LDJump") 5 | dev.off() -------------------------------------------------------------------------------- /01_Vmap1.1R/22_Maf.r: -------------------------------------------------------------------------------- 1 | #MAF-frequence 2 | setwd("//Users/guoyafei/Downloads/Results/Impute/") 3 | input <- read.table("freq_stat_input.frq", header=TRUE,stringsAsFactors = F) 4 | output <- read.table("freq_stat.frq",header=TRUE,stringsAsFactors = F) 5 | xlab <- c("0.05-0.1","0.10-0.15","0.15-0.20","0.20-0.25","0.25-0.30","0.30-0.35","0.35-0.40","0.40-0.45","0.45-0.50") 6 | a <- c(40081,26663,39421,45852,45121,34408,21774,8301,4469) 7 | b <- a/266090 8 | 9 | > length(which(output[,5] >= 0.05 &output[,5] < 0.1)) 10 | [1] 40081 11 | > length(which(output[,5] >= 0.1 &output[,5] < 0.15)) 12 | [1] 26663 13 | > length(which(output[,5] >= 0.15 &output[,5] < 0.2)) 14 | [1] 39421 15 | > length(which(output[,5] >= 0.2 &output[,5] < 0.25)) 16 | [1] 45852 17 | > length(which(output[,5] >= 0.25 &output[,5] < 0.3)) 18 | [1] 45121 19 | > length(which(output[,5] >= 0.3 &output[,5] < 0.35)) 20 | [1] 34408 21 | > length(which(output[,5] >= 0.35 &output[,5] < 0.4)) 22 | [1] 21774 23 | > length(which(output[,5] >= 0.4 &output[,5] < 0.45)) 24 | 8301 25 | > length(which(output[,5] >= 0.45 &output[,5] < 0.5)) 26 | 4469 27 | plot(b,ylim=c(0,0.5),xlim=c(1,9),cex=3) 28 | 29 | xlab2 <- c("0.00-0.01","0.01-0.02","0.02-0.03","0.03-0.04","0.04-0.05") 30 | length(which(output[,5] > 0.0 &output[,5] < 0.01)) 31 | 1631338 32 | length(which(output[,5] >= 0.01 &output[,5] < 0.02)) 33 | 292504 34 | length(which(output[,5] >= 0.02 &output[,5] < 0.03)) 35 | 119416 36 | length(which(output[,5] >= 0.03 &output[,5] < 0.04)) 37 | 45907 38 | length(which(output[,5] >= 0.04 &output[,5] < 0.05)) 39 | 21405 40 | a <- c(1631338,292504,119416,45907,21405) 41 | b <- a/2110570 42 | plot(b,ylim=c(0,1),xlim=c(1,5),cex=3) 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /01_Vmap1.1R/24_MergeTest.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Downloads/Results/ChangeFormat/test_merge") 2 | test.Exome <- read.table("test.Exome.vcf", head = FALSE, stringsAsFactors = FALSE) 3 | test.Trancing <- read.table("test.Trancing.vcf", head = FALSE, stringsAsFactors = FALSE) 4 | test.World <- read.table("test.World.vcf", head = FALSE, stringsAsFactors = FALSE) 5 | test.merge <- read.table("test.merge.vcf", head = FALSE, stringsAsFactors = FALSE) 6 | 7 | a <- paste(test.Trancing[,1],test.Trancing[,2],sep="_") 8 | b <- paste(test.World[,1],test.World[,2],sep="_") 9 | c <- paste(test.merge[,1],test.merge[,2],sep="_") 10 | d <- paste(test.Exome[,1],test.Exome[,2],sep="_") 11 | venn.diagram(x= list(FileT = a, FileE= d,FileW = b,Merge = c), 12 | filename = "merge.png", height = 450, width = 450,resolution =300, 13 | imagetype="png", col ="transparent", 14 | fill =c("cornflowerblue","green","yellow","darkorchid1"), 15 | alpha = 0.5, 16 | label.col = c("orange", "white","darkorchid4", "white", "white", "white", "white", "white","darkblue", "white", "white", "white","white", "darkgreen", "white"), 17 | cex = 0.45,fontfamily = "serif", fontface = "bold", 18 | cat.col =c("darkblue", "darkgreen", "orange","darkorchid4"), 19 | cat.cex = 0.45,cat.pos = 0, cat.dist = 0.07,cat.fontfamily = "serif", 20 | rotation.degree = 270) 21 | 22 | ab <- a[a%in%b] 23 | ad <- a[a%in%d] 24 | abd <- d[d%in%ab] 25 | a[-which(a%in%c)] -------------------------------------------------------------------------------- /01_Vmap1.1R/25_MergeVCF.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Downloads/Results/") 2 | W.pos <- read.table("World.pos",header=T,stringsAsFactors = F) 3 | B.pos <- read.table("sorted.pos",header=F,stringsAsFactors = F) 4 | colnames(B.pos) <- c("psid", "alle1", "alle2") 5 | World <- merge(W.pos, B.pos, by="psId",all.x=TRUE) 6 | rm(W.pos) 7 | 8 | Trancing <- read.table("Trancing.SNPs.pos",header = T,stringsAsFactors = F) 9 | Exome <- read.table("Exome.pos",header=F,stringsAsFactors = F) 10 | 11 | W <- paste(World[,2],World[,3],sep="") 12 | T <- paste(Trancing[,1],Trancing[,2],sep="") 13 | E <- paste(Exome[,1],Exome[,2],sep="") 14 | 15 | newW <- cbind(W,World[,1],World[,4],World[,5]) 16 | newW <- as.data.frame(newW) 17 | colnames(newW) <- c("chr","psid","alleWA","alleWB") 18 | newE <- cbind(E,Exome[,3],Exome[,4]) 19 | newE <- as.data.frame(newE) 20 | colnames(newE) <- c("chr","alleEA","alleEB") 21 | newT <- cbind(T,Trancing[,3],Trancing[,4]) 22 | newT <- as.data.frame(newT) 23 | colnames(newT) <- c("chr","alleTA","alleTB") 24 | rm(Exome) 25 | rm(Trancing) 26 | 27 | newWT <- merge(newW,newT,by="chr",all.x=TRUE) 28 | newWTE <- merge(newWT, newE,by="chr",all.x=TRUE) 29 | 30 | index <- apply(newWTE,1,function(x) length(which(is.na(x)))) 31 | new <- newWTE[which(index < 4),] 32 | G <- apply(new,1,function(x) length(which(x=="G"))) 33 | A <- apply(new,1,function(x) length(which(x=="A"))) 34 | T <- apply(new,1,function(x) length(which(x=="T"))) 35 | C <- apply(new,1,function(x) length(which(x=="C"))) 36 | 37 | new$G <- G 38 | new$A <- A 39 | new$T <- T 40 | new$C <- C 41 | 42 | delete <- new[apply(new,1,function(x) length(which(x==0))!=2),] 43 | select <- new[apply(new,1,function(x) length(which(x==0))==2),] 44 | 45 | for (i in 1:14089) { 46 | if (is.na(select[i,5])){ 47 | select[i,5] <- select[i,7] 48 | select[i,6] <- select[i,8] 49 | } 50 | } 51 | 52 | rm(A) 53 | rm(C) 54 | rm(E) 55 | rm(G) 56 | rm(i) 57 | rm(index) 58 | rm(T) 59 | rm(W) 60 | rm(new) 61 | rm(newE) 62 | rm(newT) 63 | rm(newW) 64 | rm(newWT) 65 | rm(newWTE) 66 | rm(World) 67 | 68 | newData <- select[,c(1,4,5)] 69 | 70 | WorldwideResult <- read.table("WorldwideResult",header=T,stringsAsFactors = F) 71 | 72 | deleteID <- as.character(delete[,2]) 73 | all <- WorldwideResult[,1] 74 | B.ID <- B.pos[,1] 75 | 76 | num1 <- which(all %in% deleteID) 77 | WorldFilterd <- WorldwideResult[-num1,] 78 | 79 | num2 <- which(B.ID %in% deleteID) 80 | Bpos.filted <- B.pos[-num2,] 81 | 82 | 83 | pos.merge <- merge(Bpos.filted, select, by="psid",all.x=TRUE) 84 | pos.merge[,7] <- as.character(pos.merge[,7]) 85 | pos.merge[,8] <- as.character(pos.merge[,8]) 86 | 87 | 88 | for (i in 1:102690) { 89 | if (!is.na(pos.merge[i,7])){ 90 | pos.merge[i,2] <- pos.merge[i,7] 91 | pos.merge[i,3] <- pos.merge[i,8] 92 | } 93 | } 94 | 95 | pos <- pos.merge[,c(1,2,3)] 96 | 97 | write.table(WorldFilterd, "WorldFilterdResult",quote = F,row.names = F) 98 | write.table(pos, "WorldFilterd.pos",quote = F,row.names = F) 99 | 100 | AA <- paste(pos[,2],pos[,2],sep="") 101 | AB <- paste(pos[,2],pos[,3],sep="") 102 | BB <- paste(pos[,3],pos[,3],sep="") 103 | 104 | all <- cbind(AA,AB,BB,WorldFilterd) 105 | write.table(all,file = "all.txt",quote = F,sep=" ",row.names = F) 106 | -------------------------------------------------------------------------------- /01_Vmap1.1R/27_Permut.r: -------------------------------------------------------------------------------- 1 | set.seed(98765) 2 | N <- 100 3 | r.mat <- matrix(rexp(N^2), ncol=N) 4 | cv.mat <- crossprod(r.mat) # a positive definite covariance matrix 5 | x.mat <- rep(1, N) 6 | r.mean <- 3 7 | r1 <- rnorm(N) # independent random noise 8 | r.data <- x.mat * r.mean + drop( t(chol(cv.mat)) %*% r1 ) # correlated random noise 9 | r.perm <- mvnpermute( r.data, x.mat, cv.mat, nr=1000, seed=1234) 10 | est.cvm <- cov(t(r.perm)) 11 | plot(cv.mat, est.cvm); 12 | abline(0,1) 13 | -------------------------------------------------------------------------------- /01_Vmap1.1R/28_Pluk.r: -------------------------------------------------------------------------------- 1 | library(vegan) 2 | setwd("/Users/guoyafei/Downloads/200328-普鲁克分析(Procrustes Analysis)评估物种-环境或功能关联度的一个示例") 3 | #PCA降维及探索性分析 4 | ##样方-环境属性矩阵 5 | env <- read.delim('env_table.txt', row.names = 1, sep = '\t', stringsAsFactors = FALSE, check.names = FALSE) 6 | 7 | #环境变量的 PCA 需要标准化,详情 ?rda 8 | env_pca <- rda(env, scale = TRUE) 9 | 10 | ##样方-物种丰度矩阵 11 | otu <- read.delim('spe_table.txt', row.names = 1, sep = '\t', stringsAsFactors = FALSE, check.names = FALSE) 12 | 13 | #物种数据 Hellinger 预转化 14 | otu_hel <- decostand(otu, method = 'hellinger') 15 | 16 | #对转化后的物种数据执行 PCA,无需标准化 17 | otu_pca <- rda(otu_hel, scale = FALSE) 18 | 19 | ##排序图比较,以 PCA 的 I 型标尺为例 20 | par(mfrow = c(1, 2)) 21 | biplot(env_pca, choices = c(1, 2), scaling = 1, 22 | main = '环境组成的PCA', col = c('red', 'blue')) 23 | biplot(otu_pca, choices = c(1, 2), scaling = 1, 24 | main = '物种组成的PCA', col = c('red', 'blue')) 25 | #Procrustes分析度量两数据集一致性 26 | #Procrustes 分析 27 | #提取两个 PCA 中的样方排序坐标,均以 I 型标尺为例 28 | site_env <- summary(otu_pca, scaling = 1)$site 29 | site_otu <- summary(otu_pca, scaling = 1)$site 30 | 31 | #执行 Procrustes 分析,详情 ?procrustes 32 | #以对称分析为例(symmetric = TRUE) 33 | proc <- procrustes(X = env_pca, Y = otu_pca, symmetric = TRUE) 34 | summary(proc) 35 | 36 | #旋转图 37 | plot(proc, kind = 1, type = 'text') 38 | 39 | #一些重要的结果提取 40 | names(proc) 41 | head(proc$Yrot) #Procrustes 分析后 Y 的坐标 42 | head(proc$X) #Procrustes 分析后 X 的坐标 43 | proc$ss #偏差平方和 M2 统计量 44 | proc$rotation #通过该值可获得旋转轴的坐标位置 45 | #残差图 46 | plot(proc, kind = 2) 47 | residuals(proc) #残差值 48 | #PROTEST 检验,详情 ?protest 49 | #以 999 次置换为例 50 | #注:protest() 中执行的是对称 Procrustes 分析,X 和 Y 的分配调换不影响 M2 统计量的计算 51 | set.seed(123) 52 | prot <- protest(X = env_pca, Y = otu_pca, permutations = how(nperm = 999)) 53 | prot 54 | 55 | #重要统计量的提取 56 | names(prot) 57 | prot$signif #p 值 58 | prot$ss #偏差平方和 M2 统计量 59 | library(ggplot2) 60 | 61 | #提取 Procrustes 分析的坐标 62 | Y <- cbind(data.frame(proc$Yrot), data.frame(proc$X)) 63 | X <- data.frame(proc$rotation) 64 | 65 | #添加分组信息 66 | group <- read.delim('group.txt', sep = '\t', stringsAsFactors = FALSE, check.names = FALSE) 67 | Y$samples <- rownames(Y) 68 | Y <- merge(Y, group, by = 'samples') 69 | #---------------------------------------------------- 70 | #ggplot2 作图 71 | p <- ggplot(Y) + 72 | geom_point(aes(X1, X2, color = groups), size = 1.5, shape = 16) + 73 | geom_point(aes(PC1, PC2, color = groups), size = 1.5, shape = 1) + 74 | scale_color_manual(values = c('red2', 'purple2', 'green3'), limits = c('A', 'B', 'C')) + 75 | geom_segment(aes(x = X1, y = X2, xend = PC1, yend = PC2), arrow = arrow(length = unit(0.1, 'cm')), 76 | color = 'blue', size = 0.3) + 77 | theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'), 78 | legend.key = element_rect(fill = 'transparent')) + 79 | labs(x = 'Dimension 1', y = 'Dimension 2', color = '') + 80 | geom_vline(xintercept = 0, color = 'gray', linetype = 2, size = 0.3) + 81 | geom_hline(yintercept = 0, color = 'gray', linetype = 2, size = 0.3) + 82 | geom_abline(intercept = 0, slope = X[1,2]/X[1,1], size = 0.3) + 83 | geom_abline(intercept = 0, slope = X[2,2]/X[2,1], size = 0.3) + 84 | annotate('text', label = sprintf('M^2 == 0.2178'), 85 | x = -0.21, y = 0.42, size = 3, parse = TRUE) + 86 | annotate('text', label = 'P < 0.001', 87 | x = -0.21, y = 0.38, size = 3, parse = TRUE) 88 | 89 | p 90 | 91 | #输出图片 92 | ggsave('procrustes.pdf', p, width = 6, height = 5) 93 | ggsave('procrustes.png', p, width = 6, height = 5) -------------------------------------------------------------------------------- /01_Vmap1.1R/29_Statistic.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/lab/EGWAS/forYafei/") 2 | #data <- read.csv("R1_count_statics.txt",header=T,stringsAsFactors = F,sep="\t") 3 | #data <- read.csv("bam_size.txt",header=T,stringsAsFactors = F,sep="\t") 4 | data <- read.csv("pileup_statics.txt",header=T,stringsAsFactors = F,sep="\t") 5 | library(ggplot2) 6 | #ggplot(data,aes(Reads.count))+geom_histogram(binwidth=0.01) 7 | 8 | data$Taxon.name.5700 <- as.factor(data$Taxon.name.5700) 9 | data$Flowcell.ID <- as.factor(data$Flowcell.ID) 10 | data$Lane <- as.factor(data$Lane) 11 | data$Library.ID <- as.factor(data$Library.ID) 12 | data$Company <- as.factor(data$Company) 13 | 14 | cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") 15 | #直方图 16 | 17 | facet_grid(Company~.)+ 18 | scale_color_manual(values=cbPalette) 19 | 20 | p+labs(x = "log(Reads count)", y = "Individual count", title = "Reads count at SNP sites in 5700 Individuals")+ 21 | theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16),plot.title = element_text(hjust = 0.5,vjust=0.5,size=20),axis.title.x = element_text(size = 17),axis.title.y = element_text(size = 17))+ 22 | theme(panel.border = element_blank(),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_line(colour = "black"))+ 23 | geom_vline(aes(xintercept=2.4548), colour="#990000", linetype="dashed")+ 24 | geom_vline(aes(xintercept=2.7559), colour="#E69F00", linetype="dashed") 25 | 26 | #箱线图 27 | pbox <- ggplot(data,aes(Company,Sequencing_error,color=Company))+ 28 | geom_boxplot(outlier.colour="red", outlier.shape=8, outlier.size=4)+ 29 | scale_color_manual(values=cbPalette)+ 30 | geom_jitter(shape=16, position=position_jitter(0.2))+ 31 | labs(title = "Error Frrequency Distribution of 124 Libraries")+ 32 | theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16),plot.title = element_text(hjust = 0.5,vjust=0.5,size=20),axis.title.x = element_text(size = 17),axis.title.y = element_text(size = 17),legend.text = element_text(size = 13))+ 33 | theme(panel.border = element_blank(),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_line(colour = "black")) 34 | 35 | #多因素方差分析 36 | Result<-aov(Reads.count~Library.ID+Company+Library.ID:Company,data=data) 37 | anova(Result) 38 | interaction.plot(data$Company,data$Library.ID,data$log.bam_Mb.,type="b",legend =F,xlab="Company",ylab="log bamfile size(Mb)",main="The bamfile size(Mb) Distribution of 11,976 Individuals",cex.lab=1.5, cex.axis=1.5, cex.main=1.7, cex.sub=1.5) 39 | #单因素方差分析 40 | OneWay<-aov(Reads.count~Company,data=my) 41 | anova(OneWay) 42 | summary(OneWay) 43 | 44 | library("gplots") 45 | plotmeans(log.Reads.count~Company,data=data,p=0.95,use.t=TRUE) 46 | 47 | 48 | -------------------------------------------------------------------------------- /01_Vmap1.1R/30_Wenn.r: -------------------------------------------------------------------------------- 1 | **统计每个文件里的未分离位点 2 | setwd("/data1/home/yafei/Project3/Download") 3 | EC <- read.table("new_EC",header=T,stringsAsFactors=F) 4 | TR <- read.table("new_TR",header=T,stringsAsFactors=F) 5 | WW <- read.table("new_WW",header=T,stringsAsFactors=F) 6 | 7 | bing <- read.table("/data1/home/yafei/Project3/Vmap1.1/out.txt",header=F,stringsAsFactors=F) 8 | EC_id <- EC[,3] 9 | WW_id <- WW[,3] 10 | TR_id <- TR[,3] 11 | 12 | noSep_EC <- read.table("EC_noSep.txt",header=F,stringsAsFactors=F) 13 | noSep_TR <- read.table("TR_noSep.txt",header=F,stringsAsFactors=F) 14 | noSep_WW <- read.table("WW_noSep.txt",header=F,stringsAsFactors=F) 15 | 16 | #1. 算各文件的分离位点 17 | EC_sep <- EC_id[-which(EC_id %in% noSep_EC[,1])] 18 | WW_sep <- WW_id[-which(WW_id %in% noSep_WW[,1])] 19 | TR_sep <- TR_id[-which(TR_id %in% noSep_TR[,1])] 20 | #2. 求其他文件的分离位点 21 | TR_WW_V <- c(TR_sep,WW_sep,bing[,1]) 22 | EC_WW_V <- c(EC_sep,WW_sep,bing[,1]) 23 | EC_TR_V <- c(EC_sep,TR_sep,bing[,1]) 24 | #3. 求实际各文件的非分离位点 25 | noSep_real_EC <- noSep_EC[-which(noSep_EC[,1] %in% TR_WW_V),1] 26 | noSep_real_TR <- noSep_TR[-which(noSep_TR[,1] %in% EC_WW_V),1] 27 | noSep_real_WW <- noSep_WW[-which(noSep_WW[,1] %in% EC_TR_V),1] 28 | #4. 求实际各文件的分离位点 29 | Sep_real_EC <- EC_id[-which(EC_id %in% noSep_real_EC)] 30 | Sep_real_TR <- TR_id[-which(TR_id %in% noSep_real_TR)] 31 | Sep_real_WW <- WW_id[-which(WW_id %in% noSep_real_WW)] 32 | #画韦恩图 33 | #三个文件 34 | Length_EC<-length(Sep_real_EC) 35 | Length_TR<-length(Sep_real_TR) 36 | Length_WW<-length(Sep_real_WW) 37 | Length_EC_TR<-length(intersect(Sep_real_EC,Sep_real_TR)) 38 | Length_WW_TR<-length(intersect(Sep_real_WW,Sep_real_TR)) 39 | Length_EC_WW<-length(intersect(Sep_real_EC,Sep_real_WW)) 40 | library(VennDiagram) 41 | T<-venn.diagram(list(EC_86=Sep_real_EC,TR_28=Sep_real_TR,WW_741=Sep_real_WW),filename=NULL,lwd=1,lty=2,col=c('red','green','blue'),fill=c('red','green','blue'),cat.col=c('red','green','blue'),reverse=TRUE) 42 | pdf("Venn.pdf") 43 | grid.draw(T) 44 | dev.off() 45 | #两个文件 46 | pdf("Venn2.pdf") 47 | draw.pairwise.venn(area1=Length_A,area2=Length_B,cross.area=Length_AB,category=c('A','B'),lwd=rep(1,1),lty=rep(2,2),col=c('red','green'),fill=c('red','green'),cat.col=c('red','green'),rotation.degree=90) 48 | dev.off() 49 | 50 | write.table(Sep_real_EC,"Sep_real_EC", quote=F,row.names=F) 51 | write.table(Sep_real_TR,"Sep_real_TR", quote=F,row.names=F) 52 | write.table(Sep_real_WW,"Sep_real_WW", quote=F,row.names=F) 53 | 54 | #输出hapscanner的输入文件:pos.txt & posAllele.txt 55 | Real_WW <- read.table("Sep_real_WW", header=F,stringsAsFactors=F) 56 | selec <- WW[which(WW$ID %in% Real_WW[,1]),] 57 | posAllele <- selec[,c(1,2,4,5)] 58 | colnames(posAllele) <- c("Chr","Pos","Ref","Alt") 59 | pos <- selec[,c(1,2)] 60 | colnames(pos) <- c("Chr","Pos") 61 | 62 | for(i in 1:42){ 63 | a <- posAllele[which(posAllele$Chr == i),] 64 | fileA <- paste("/data1/home/yafei/Project3/Download/Hapscanner/chr",i,"_posAllele.txt",sep="") 65 | write.table(a,fileA,row.names=F,quote=F,sep="\t") 66 | b <- pos[which(pos$Chr == i),] 67 | fileB <- paste("/data1/home/yafei/Project3/Download/Hapscanner/chr",i,"_pos.txt",sep="") 68 | write.table(b,fileB,row.names=F,quote=F,sep="\t") 69 | } 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /01_Vmap1.1R/31_WheatMap.r: -------------------------------------------------------------------------------- 1 | data <- read.table("Down+Vmap.txt",header=T,stringsAsFactors=F) 2 | library(ggmap) 3 | library(ggplot2) 4 | library(sp) 5 | library(maptools) 6 | library(maps) 7 | register_google(key = "AIzaSyAFnTwa7kEJAqIeHU-Kw6qPZiPUU-zXCLI") 8 | visit.x<-as.numeric(caseAll$Logititude) 9 | visit.y<-as.numeric(caseAll$Latitude) 10 | mp<-NULL #定义一个空的地图 11 | mapworld<-borders("world",colour = "gray50",fill="white") #绘制基本地图 12 | mp<-ggplot()+mapworld+ylim(-60,90) 13 | #利用ggplot呈现,同时地图纵坐标范围从-60到90 14 | mp2<-mp+geom_point(aes(x=visit.x,y=visit.y, size=caseAll$count),color="darkorange")+scale_size(range=c(1,1)) 15 | #绘制带点的地图,geom_point是在地图上绘制点,x轴为经度信息,y轴为纬度信息,size是将点的大小按照收集的个数确定,color为暗桔色,scale_size是将点变大一些 16 | mp3<-mp2+labs(x="Longitude", y="Latitude")+ 17 | theme(legend.position = "none",panel.background = element_blank(),axis.title.x = element_text(size = 25),axis.title.y = element_text(size = 25),axis.text.x = element_text(size = 25),axis.text.y = element_text(size = 25),panel.border = element_blank()) #将图例去掉 18 | pdf("castAll.pdf",width=20,height=8) 19 | mp3 20 | dev.off() 21 | 22 | 23 | ###画世界地图### 24 | data <- read.table("/Users/guoyafei/Desktop/dataset_accessions_WGS.txt",header=T,stringsAsFactors = F) 25 | library(maps) 26 | data$color <- NA 27 | #map1 28 | data[which(data$Region == "AF"), 11 ] <- "darkblue" 29 | data[which(data$Region == "AM"), 11 ] <- "brown" 30 | data[which(data$Region == "EA"), 11 ] <- "purple" 31 | data[which(data$Region == "EU"), 11 ] <- "yellow" 32 | data[which(data$Region == "SCA"), 11 ] <- "red" 33 | data[which(data$Region == "WA"), 11 ] <- "green" 34 | pdf(file = "map1.pdf",width=14,height=7) 35 | map(database="world",fill = T,col = "grey",interior = F,border=NA) 36 | points(x = data$Logititude, y = data$Latitude, col =data$color, pch=20,cex=0.5) 37 | legend("right",legend = c("AF","AM","EA","EU","SCA","WA"),col = c("darkblue","brown","purple","yellow","red","green",rgb(0,255/255,255/255),rgb(255/255,0,255/255)),pch=20,cex=1,box.lty=0) 38 | dev.off() 39 | #map2 40 | data[which(data$Genome == "SS"), 12 ] <- "green" 41 | data[which(data$Genome == "AA"), 12 ] <- "brown" 42 | data[which(data$Genome == "DD"), 12 ] <- "purple" 43 | data[which(data$Genome == "AABB"), 12 ] <- "yellow" 44 | data[which(data$Genome == "AABBDD"), 12 ] <- "red" 45 | pdf(file = "map2.pdf",width=14,height=7) 46 | map(database="world",fill = T,col = "grey",interior = F,border=NA) 47 | points(x = data$Logititude, y = data$Latitude, col =data$color, pch=20,cex=0.5) 48 | legend("right",legend = c("SS","AA","DD","AABB","AABBDD"),col = c("green","brown","purple","yellow","red",rgb(0,255/255,255/255),rgb(255/255,0,255/255)),pch=20,cex=1,box.lty=0) 49 | dev.off() 50 | 51 | -------------------------------------------------------------------------------- /01_Vmap1.1R/32_NeSize.r: -------------------------------------------------------------------------------- 1 | library(RColorBrewer) 2 | #color <- brewer.pal(5, "RdBu")[c(2,5)] 3 | #color <- append(color,brewer.pal(5, "PuOr")[c(2,5)]) 4 | #brewer.pal(5, "PiYG")[c(2,5)] 5 | #brewer.pal(11, "RdYlBu")[1] 6 | color <- c(brewer.pal(5, "RdBu")[c(2,5)],brewer.pal(5, "PuOr")[c(2,5)],brewer.pal(5, "PiYG")[c(2,5)],brewer.pal(11, "RdYlBu")[c(10,11)]) 7 | setwd("/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/Ne") 8 | 9 | setwd("~/Desktop/") 10 | ####改格式 11 | library(RColorBrewer) 12 | library(cowplot) 13 | library(viridis) 14 | display.brewer.all() 15 | brewer.pal(12,'Set3') 16 | brewer.pal(7,'PuRd') 17 | brewer.pal(7,'Blues') 18 | brewer.pal(7,'OrRd') 19 | brewer.pal(7,'Greens') 20 | a = seq(1:10) 21 | #cols = viridis(8) 22 | cols <- color 23 | plot(NULL,xlim = c(2,3),ylim = c(0,8),axes=F,xlab="",ylab="") 24 | #set.seed(2) 25 | #rom = sample(1:14,14,replace = F) 26 | j = 0 27 | for (i in (1:8)){ 28 | j=j+1 29 | lines(a,rep(j,10),type="l",col=cols[i],lwd = 19) 30 | } 31 | #col1 = c("#66C2A5", "#FC8D62", "#8DA0CB","#FFD92F","#E78AC3","#07dede","#E5C494") 32 | #col2 = c("#f15a24","#779970","#8dd3c7","#fcee21","#ba9bc9","#619cff","#c69c6d") 33 | col1 = c("#f5e1fa", "#C6DBEF", "#FDD49E","#9FDA3AFF","#8dd3c7","#277F8EFF","#FC8D59") 34 | #setwd("/Users/xuebozhao/Documents/LuLab/wheatSpeciation/smcpp/getpoints") 35 | dataf = read.table("/Volumes/My Passport/01_VMap1.1/02_Environment/03_Ne/count2.txt",header = T) 36 | unique(dataf$Subspecies) 37 | datafd <- dataf[which(dataf$Subspecies != "Urartu" & dataf$Subspecies != "Speltoilds" & dataf$Time <=10000),] 38 | #datafd$Subspecies = factor(datafd$Subspecies,levels=c("Urartu","Speltoilds","Strangulata","Wild_emmer", 39 | # "Domesticated_emmer","Free_threshing","Bread_wheat")) 40 | 41 | datafd$Subspecies = factor(datafd$Subspecies,levels=c("Strangulata","Wild_emmer", 42 | "Domesticated_emmer","Free_threshing","Bread_wheat")) 43 | 44 | 45 | #col1 <- c(brewer.pal(10, "RdBu")[c(3,4,5,7,8,9)],brewer.pal(8, "Set1")[6]) 46 | 47 | col1 <- c(brewer.pal(10, "RdBu")[c(3,4,7,8)],brewer.pal(8, "Set1")[6]) 48 | 49 | col1 <- c(brewer.pal(5, "RdBu")[5],brewer.pal(5, "PuOr")[5],brewer.pal(5, "PiYG")[5],brewer.pal(5, "RdBu")[2],brewer.pal(5, "PuOr")[2],brewer.pal(5, "PiYG")[2],brewer.pal(11, "RdYlBu")[11]) 50 | 51 | p1 = ggplot(datafd, aes(x = Time,y = Contribution,fill = Subspecies))+ 52 | ####position="stack"堆叠状 53 | ####position="stack" 改为"postion_stack(reverse=T) 反向堆叠 54 | geom_bar(stat ="identity",width = 800,position ="stack")+ 55 | scale_fill_manual(values = col1)+ 56 | labs(x = "",y = "All effective population size",size = rel(1)) + 57 | #geom_text(aes(label = datafd$Num),position=position_stack(vjust =0.5),size = 5)+ 这行代码是显示个数的数字 58 | guides(fill = guide_legend(reverse = F))+ 59 | theme_bw()+ 60 | theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1,size = rel(0.8))) + 61 | theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=0.5,colour="black")) 62 | 63 | ggplot(datafd, aes(x = Time,y = Contribution,fill = Subspecies))+ 64 | ####position="stack"堆叠状 65 | ####position="stack" 改为"postion_stack(reverse=T) 反向堆叠 66 | geom_bar(stat ="identity",width = 800,position ="fill")+ 67 | scale_fill_manual(values = col1)+ 68 | labs(x = "",y = "All effective population size",size = rel(1)) + 69 | #geom_text(aes(label = datafd$Num),position=position_stack(vjust =0.5),size = 5)+ 这行代码是显示个数的数字 70 | guides(fill = guide_legend(reverse = F))+ 71 | theme_bw()+ 72 | theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1,size = rel(0.8))) + 73 | theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=0.5,colour="black")) 74 | -------------------------------------------------------------------------------- /01_Vmap1.1R/33_vf_pipe.sh: -------------------------------------------------------------------------------- 1 | #############################################volcano Finder####################################### 2 | # VolcanoFinder is a program to perform genome-wide scans of adaptive introgression by using the 3 | # spatial distribution of within-species polymorphism and between species substitution in the genome. 4 | # input: 1.Allele frequency file 2.site frequency spectrum(sfs)3.ancestral file 5 | ##工作路径: /data1/home/xuebo/Projects/Speciation/volcanofinder 6 | 7 | ###### installation ###### 8 | wget -c http://degiorgiogroup.fau.edu/vf.html 9 | tar -xzvf volcanofinder_v1.0.tar.gz 10 | cd volcanofinder_v1.0 11 | make 12 | vim ~/.bashrc 13 | source ~/.bashrc 14 | 15 | ###### prepare allele frequency file ###### 16 | # 1.calculate allele numbers of ref and al 17 | # ex:chr1 821443 821444 T 1137 C 3 18 | vcftools --gzvcf /data1/home/xuebo/Projects/Speciation/E3/chr36.all.vcf.gz --counts --out chr36_part1 19 | cat chr36_part1.frq.count |tail -n +2 |awk '{print $1"\t"$2-1"\t"$2"\t"substr($5,1,1)"\t"substr($5,3,length($5))"\t"substr($6,1,1)"\t"substr($6,3,length($6))}'>chr36_part1.temp 20 | # 2.extract and non-polarized sites to obtain Freq file 21 | # ex:position x n folded 22 | # 460000 9 100 0 23 | # 460010 100 100 0 24 | # 460210 30 78 1 25 | awk '{print $1"\t"$2-1"\t"$2"\t"$3}' A_pos_ances.txt > A_pos_ances.bed 26 | awk '{print $1"\t"$2-1"\t"$2"\t"$3}' B_pos_ances.txt > B_pos_ances.bed 27 | awk '{print $1"\t"$2-1"\t"$2"\t"$3}' D_pos_ances.txt > D_pos_ances.bed 28 | mkdir -p Dsplit_ances 29 | for chr in `cut -f 1 D_pos_ances.bed | sort | uniq`; do 30 | grep -w $chr D_pos_ances.bed > Dsplit_ances/$chr.pos_ances.bed 31 | done 32 | ***intersect,取交集,并且找出谁是ancestral,格式是pos\tderived\tderived+ancestral 33 | bedtools intersect -a ../anc/Dsplit_ances/36.pos_ances.bed -b chr36_part1.temp -wo |awk '{if($4==$8)print $2"\t"$11"\t"$11+$9"\t""0";else print $2"\t"$9"\t"$9+$11"\t""0" }' >polarized.freq 34 | ***找出-a里面的特有的pos 35 | bedtools subtract -b ../anc/Dsplit_ances/36.pos_ances.bed -a chr36_part1.temp |awk '{print $2"\t"$7"\t"$5+$7"\t""1"}' >>polarized.freq 36 | # remove undistinguished sites 37 | awk '{if($2==0)print $0 }' polarized.freq >x 38 | awk '{if($2==$3&&$4==1)print $0 }' polarized.freq >>x 39 | sort -k1,1n x>xx 40 | awk 'FNR==NR{a[$0]++;next}(!($0 in a))' xx polarized.freq >FreqFile 41 | sort -k1,1n FreqFile |sed '1i position\tx\tn\tfolded' > FreqFilesort 42 | # 3.sum to sfs Spect file 43 | # 604 is alleles number 44 | for var in {1..604};do awk -v j=$var '{if($2==j)print $2}' FreqFile| wc -l >> tempp ; done 45 | 46 | 47 | # 104072 is snp number zcat /data1/home/xuebo/Projects/Speciation/E3/chr36.all.vcf.gz | grep -v "#" | wc -l 48 | awk '{print NR"\t"$1/104072}' tempp >SpectFile 49 | 50 | ###### run VolcanoFinder by blocks ####### 51 | # g:block length(ex:1000bp) G:step length(ex:1000snp) 52 | #ex:VolcanoFinder -big 1000 FreqFile SpectFile -1 1 1 xbsj 1 50 53 | #VolcanoFinder -big g FreqFile SpectFile D P MODEL OutFile BLOCK NBLOCK 54 | #VolcanoFinder -bi G FreqFile SpectFile D P MODEL OutFile BLOCK NBLOCK 55 | VolcanoFinder -big 100 FreqFilesort SpectFile -1 1 1 xbsj 1 50 56 | 57 | # merge results 58 | #VolcanoFinder -m xbsj 50 59 | VolcanoFinder -m OutFile NBLOCK -------------------------------------------------------------------------------- /01_Vmap1.1R/34_Vmap3.sh: -------------------------------------------------------------------------------- 1 | #Vmap3 2 | #yafei@204:/data2/yafei/Vmap3/sort 3 | #204: /data3/wgs/fastq/wildEmmer_S35 4 | zcat /data3/wgs/fastq/wildEmmer_S35/Rawdata/SFS1_1.fq.gz | paste - - - - | sort -k1,1 -t " " | tr '\t' '\n' | gzip -c > /data2/yafei/Vmap3/sort/SFS1_1.sorted.fq.gz 5 | zcat /data3/wgs/fastq/wildEmmer_S35/Rawdata/SFS1_2.fq.gz | paste - - - - | sort -k1,1 -t " " | tr '\t' '\n' | gzip -c > /data2/yafei/Vmap3/sort/SFS1_2.sorted.fq.gz 6 | 7 | zcat /data1/home/liping/gasdata/cleandata/CRR061687_f1.filtered.fq.gz |grep "@" |sort |uniq -c > /data2/liping/Vmap3/CRR061687_f1.id 8 | zcat /data1/home/liping/gasdata/cleandata/CRR061687_r2.filtered.fq.gz |grep "@" |sort |uniq -c > /data2/liping/Vmap3/CRR061687_r2.id 9 | zcat /data1/home/liping/gasdata/cleandata/CRR061717_f1.filtered.fq.gz |grep "@" |sort |uniq -c > /data2/liping/Vmap3/CRR061717_f1.id 10 | zcat /data1/home/liping/gasdata/cleandata/CRR061717_r2.filtered.fq.gz |grep "@" |sort |uniq -c > /data2/liping/Vmap3/CRR061717_r2.id 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /01_Vmap1.1R/35_ChrBin.R: -------------------------------------------------------------------------------- 1 | library(CMplot) 2 | setwd("/Users/guoyafei/Documents/个人项目/Project-4-VmapIII/Fastcall2/测试数据/") 3 | 4 | mydata<-read.table("/Users/guoyafei/Documents/个人项目/Project-4-VmapIII/Fastcall2/测试数据/fastcall2_001_pos.txt",header=TRUE,sep="\t") 5 | head(mydata) 6 | # snp chr pos 7 | # snp1_1 1 2041 8 | # snp1_2 1 2062 9 | # snp1_3 1 2190 10 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 11 | mydata<-read.table("/Users/guoyafei/Documents/个人项目/Project-4-VmapIII/Fastcall2/测试数据/fastcall_001_pos.txt",header=TRUE,sep="\t") 12 | head(mydata) 13 | # snp chr pos 14 | # snp1_1 1 2041 15 | # snp1_2 1 2062 16 | # snp1_3 1 2190 17 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 18 | -------------------------------------------------------------------------------- /01_Vmap1.1R/36_LDJump.R: -------------------------------------------------------------------------------- 1 | require(LDJump) 2 | results = LDJump("/data1/home/yafei/LDJump-test/HatLandscapeN16Len1000000Nrhs15_th0.01_540_1.fa", alpha = 0.05, segLength = 1000, pathLDhat = "/data1/home/yafei/Software/LDhat-master/", pathPhi = "/data1/home/yafei/Software/PhiPack/Phi", format = "fasta", refName = NULL) 3 | postscript("Results.eps", horiz = F) 4 | plot(results[[1]], xlab = "Segments", ylab = "Estimated Recombination Rate", main = "Estimated recombination map with LDJump") 5 | dev.off() -------------------------------------------------------------------------------- /01_Vmap1.1R/37_permut.R: -------------------------------------------------------------------------------- 1 | set.seed(98765) 2 | N <- 100 3 | r.mat <- matrix(rexp(N^2), ncol=N) 4 | cv.mat <- crossprod(r.mat) # a positive definite covariance matrix 5 | x.mat <- rep(1, N) 6 | r.mean <- 3 7 | r1 <- rnorm(N) # independent random noise 8 | r.data <- x.mat * r.mean + drop( t(chol(cv.mat)) %*% r1 ) # correlated random noise 9 | r.perm <- mvnpermute( r.data, x.mat, cv.mat, nr=1000, seed=1234) 10 | est.cvm <- cov(t(r.perm)) 11 | plot(cv.mat, est.cvm); 12 | abline(0,1) 13 | -------------------------------------------------------------------------------- /01_Vmap1.1R/38_Venn.R: -------------------------------------------------------------------------------- 1 | **统计每个文件里的未分离位点 2 | setwd("/data1/home/yafei/Project3/Download") 3 | EC <- read.table("new_EC",header=T,stringsAsFactors=F) 4 | TR <- read.table("new_TR",header=T,stringsAsFactors=F) 5 | WW <- read.table("new_WW",header=T,stringsAsFactors=F) 6 | 7 | bing <- read.table("/data1/home/yafei/Project3/Vmap1.1/out.txt",header=F,stringsAsFactors=F) 8 | EC_id <- EC[,3] 9 | WW_id <- WW[,3] 10 | TR_id <- TR[,3] 11 | 12 | noSep_EC <- read.table("EC_noSep.txt",header=F,stringsAsFactors=F) 13 | noSep_TR <- read.table("TR_noSep.txt",header=F,stringsAsFactors=F) 14 | noSep_WW <- read.table("WW_noSep.txt",header=F,stringsAsFactors=F) 15 | 16 | #1. 算各文件的分离位点 17 | EC_sep <- EC_id[-which(EC_id %in% noSep_EC[,1])] 18 | WW_sep <- WW_id[-which(WW_id %in% noSep_WW[,1])] 19 | TR_sep <- TR_id[-which(TR_id %in% noSep_TR[,1])] 20 | #2. 求其他文件的分离位点 21 | TR_WW_V <- c(TR_sep,WW_sep,bing[,1]) 22 | EC_WW_V <- c(EC_sep,WW_sep,bing[,1]) 23 | EC_TR_V <- c(EC_sep,TR_sep,bing[,1]) 24 | #3. 求实际各文件的非分离位点 25 | noSep_real_EC <- noSep_EC[-which(noSep_EC[,1] %in% TR_WW_V),1] 26 | noSep_real_TR <- noSep_TR[-which(noSep_TR[,1] %in% EC_WW_V),1] 27 | noSep_real_WW <- noSep_WW[-which(noSep_WW[,1] %in% EC_TR_V),1] 28 | #4. 求实际各文件的分离位点 29 | Sep_real_EC <- EC_id[-which(EC_id %in% noSep_real_EC)] 30 | Sep_real_TR <- TR_id[-which(TR_id %in% noSep_real_TR)] 31 | Sep_real_WW <- WW_id[-which(WW_id %in% noSep_real_WW)] 32 | #画韦恩图 33 | #三个文件 34 | Length_EC<-length(Sep_real_EC) 35 | Length_TR<-length(Sep_real_TR) 36 | Length_WW<-length(Sep_real_WW) 37 | Length_EC_TR<-length(intersect(Sep_real_EC,Sep_real_TR)) 38 | Length_WW_TR<-length(intersect(Sep_real_WW,Sep_real_TR)) 39 | Length_EC_WW<-length(intersect(Sep_real_EC,Sep_real_WW)) 40 | library(VennDiagram) 41 | T<-venn.diagram(list(EC_86=Sep_real_EC,TR_28=Sep_real_TR,WW_741=Sep_real_WW),filename=NULL,lwd=1,lty=2,col=c('red','green','blue'),fill=c('red','green','blue'),cat.col=c('red','green','blue'),reverse=TRUE) 42 | pdf("Venn.pdf") 43 | grid.draw(T) 44 | dev.off() 45 | #两个文件 46 | pdf("Venn2.pdf") 47 | draw.pairwise.venn(area1=Length_A,area2=Length_B,cross.area=Length_AB,category=c('A','B'),lwd=rep(1,1),lty=rep(2,2),col=c('red','green'),fill=c('red','green'),cat.col=c('red','green'),rotation.degree=90) 48 | dev.off() 49 | 50 | write.table(Sep_real_EC,"Sep_real_EC", quote=F,row.names=F) 51 | write.table(Sep_real_TR,"Sep_real_TR", quote=F,row.names=F) 52 | write.table(Sep_real_WW,"Sep_real_WW", quote=F,row.names=F) 53 | 54 | #输出hapscanner的输入文件:pos.txt & posAllele.txt 55 | Real_WW <- read.table("Sep_real_WW", header=F,stringsAsFactors=F) 56 | selec <- WW[which(WW$ID %in% Real_WW[,1]),] 57 | posAllele <- selec[,c(1,2,4,5)] 58 | colnames(posAllele) <- c("Chr","Pos","Ref","Alt") 59 | pos <- selec[,c(1,2)] 60 | colnames(pos) <- c("Chr","Pos") 61 | 62 | for(i in 1:42){ 63 | a <- posAllele[which(posAllele$Chr == i),] 64 | fileA <- paste("/data1/home/yafei/Project3/Download/Hapscanner/chr",i,"_posAllele.txt",sep="") 65 | write.table(a,fileA,row.names=F,quote=F,sep="\t") 66 | b <- pos[which(pos$Chr == i),] 67 | fileB <- paste("/data1/home/yafei/Project3/Download/Hapscanner/chr",i,"_pos.txt",sep="") 68 | write.table(b,fileB,row.names=F,quote=F,sep="\t") 69 | } 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /01_Vmap1.1R/43_est.sh: -------------------------------------------------------------------------------- 1 | #---------------------------Dlineage_PopDiv_no.est--------------------------- 2 | // Priors and rules file 3 | // ********************* 4 | 5 | [PARAMETERS] 6 | //#isInt? #name #dist.#min #max 7 | //all Ns are in number of haploid individuals 8 | 1 pop1 logunif 100 100000 output 9 | 1 pop2 logunif 100 100000 output 10 | 0 RSANC logunif 0.1 100 output 11 | 0 TPROP logunif 0.0001 0.5 output 12 | 13 | [RULES] 14 | 15 | [COMPLEX PARAMETERS] 16 | #---------------------------------- Dlineage_PopDiv_no.tpl ------------------------------- 17 | //Parameters for the coalescence simulation program : fastsimcoal.exe 18 | 2 samples to simulate : 19 | //Population effective sizes (number of genes) 20 | pop1 21 | pop2 22 | //Haploid samples sizes 23 | 40 24 | 40 25 | //Growth rates: negative growth implies population expansion 26 | 0 27 | 0 28 | //Number of migration matrices : 0 implies no migration between demes 29 | 0 30 | //historical event: time, source, sink, migrants, new deme size, new growth rate, migration matrix index 31 | 2 historical event 32 | 10000 1 0 1 RSANC 0 0 33 | 6500 0 1 0 RSANC 0.0002 0 34 | //Number of independent loci [chromosome] 35 | 1 0 36 | //Per chromosome: Number of contiguous linkage Block: a block is a set of contiguous loci 37 | 1 38 | //per Block:data type, number of loci, per generation recombination and mutation rates and optional parameters 39 | FREQ 1 0 6.5e-9 OUTEXP 40 | 41 | #run 42 | ./fsc26 -t Dlineage_PopDiv_diff.tpl -e Dlineage_PopDiv_diff.est -d -0 -n 100000 -L 40 -s 0 -M -q -c80 43 | -d:dsfs 44 | -0:不考虑 SFS 似然计算的单态位点 45 | -C:在似然计算中考虑的最小观察到的 SFS 条目计数(默认值 = 1,但值可以 < 1。例如 0.5) 46 | -n:要执行的模拟次数,也应用于参数估计 47 | -L:在 lhood 最大化期间要执行的循环(ECM 循环)数。 默认为 20 48 | -s:输出 DNA 作为 SNP 数据,并指定最大数量。 要输出的 SNP(使用 0 输出所有 SNP)。 49 | -M:从迭代之间的 SFS 值通过 maxlhood 执行参数估计 50 | -q:quiet 51 | -c:用于参数估计的openMP线程数(默认=1,max=numBatches,使用0让openMP选择最优值) 52 | 53 | -i:参数文件的名字 54 | -E:从参数先验中抽取的次数(可选), 在模板文件中替换列出的参数值 55 | -g:使用基因型数据生成 arlequin 项目 56 | -S:输出整个 DNA 序列,包括单态位点 57 | -I:根据无限位点 (IS) 突变模型生成 DNA 突变 58 | -m:msfs 59 | 60 | Historical events can be used to: 61 | • Change the size of a given population. 62 | • Change the growth rate of a given population. 63 | • Change the migration matrix to be used between populations. 64 | • Move a fraction of the genes of a given population to another population. This amounts to 65 | implementing a (stochastic) admixture or introgression event. 66 | • Move all genes from a population to another population. This amounts to fusing two 67 | populations into one, looking backward in time, or implementing a populations’ fission 68 | looking forward in time. 69 | • One or more of these events at the same time 70 | 71 | -------------------------------------------------------------------------------- /01_Vmap1.1R/45_Admixture.sh: -------------------------------------------------------------------------------- 1 | #按MAF>0.05和缺失率<0.1过滤 2 | plink --vcf test.imputed.vcf --maf 0.05 --geno 0.1 --recode vcf-iid --out test.filter --allow-extra-chr --double-id --autosome-num 42 3 | --keep-only-indels 4 | #对VCF进行LD筛选 5 | plink --vcf test.filter.vcf --indep-pairwise 50 10 0.2 --out test.filterLD --allow-extra-chr --double-id --autosome-num 42 6 | 7 | #提取筛选结果 8 | #plink --vcf test.filter.vcf --make-bed --extract test.filterLD.prune.in --out test.filter.prune.in --double-id --autosome-num 42 9 | plink --vcf test.filter.vcf --extract test.filterLD.prune.in --recode vcf-iid --out test.filter.prune.in --double-id --autosome-num 42 10 | 11 | #转换成structure/admixture格式 12 | plink --bfile test.filter.prune.in --recode structure --out test.filter.prune.in #生成. recode.strct_in为structure输入格式 13 | plink --bfile test.filter.prune.in --recode 12 --out test.filter.prune.in --autosome-num 42 #生成.ped为admixture输入格式 14 | plink --bfile test.filter.prune.in --recode vcf --out test.filter.prune.in --autosome-num 42 15 | #admixture 16 | admixture --cv test.filter.ped 1 >>log.txt 17 | admixture --cv test.filter.ped 2 >>log.txt 18 | ....... 19 | admixture --cv test.filter.ped 13 >>log.txt 20 | wait 21 | grep "CV error" log.txt > k_1to13 22 | 23 | 24 | -------------------------------------------------------------------------------- /01_Vmap1.1R/49_Variants_Age.sh: -------------------------------------------------------------------------------- 1 | Working Directory: 2 | xuebo@204:/data2/xuebo/Projects/Speciation/Variant_Age/Chr 3 | xuebo@204:/data2/xuebo/Projects/Speciation/Variant_Age/Lineage 4 | xuebo@204:/data2/xuebo/Projects/Speciation/E6/Landrace_locate_225/Landrace_225_noAM_220_maf001/LD/chr6.lineage_Landrace_225_noAM_220_maf001_LD.vcf 5 | 6 | for i in {1..42} 7 | do 8 | bash 42-21.sh /data2/xuebo/Projects/Speciation/E6/Landrace_locate_225/Landrace_225_noAM_220_maf001/LD/chr${i}.lineage_Landrace_225_noAM_220_maf001_LD.vcf > chr${i}.vcf & 9 | done 10 | vcf-concat chr1.vcf chr2.vcf | bgzip -c > chr1A.vcf.gz 11 | 12 | #step1 转格式(vcf文件染色体号不能有字母,重组率文件必须有chr) 13 | #yafei@204:/data1/home/yafei/Test 14 | plink2 --vcf A_position.vcf.recode.vcf --allow-extra-chr --alt1-allele 'force' A_Alt.Anc.vcf.gz 4 3 '#' --export vcf --out A_Anc --autosome-num 42 15 | plink2 --vcf B_position.vcf.recode.vcf --allow-extra-chr --alt1-allele 'force' B_Alt.Anc.vcf.gz 4 3 '#' --export vcf --out B_Anc --autosome-num 42 16 | plink2 --vcf D_position.vcf.recode.vcf --allow-extra-chr --alt1-allele 'force' D_Alt.Anc.vcf.gz 4 3 '#' --export vcf --out D_Anc --autosome-num 42 17 | #xuebo@204:/data2/xuebo/Projects/Speciation/Variant_Age/Chr 18 | 19 | for i in {1..7} 20 | do 21 | for j in {"A","B","D"} 22 | do 23 | geva_v1beta --vcf ../Chr/chr${i}${j}.vcf.gz --map recombination_rate_geva2.txt --out chr${i}${j}_rec 24 | done 25 | done 26 | 27 | for i in `ls *marker.txt` 28 | do 29 | awk '{print $3}' ${i} | sed '1,2d' |sed '$d' > ${i::-11}.pos & 30 | done 31 | 32 | #step2 运行 33 | for i in {1..7} 34 | do 35 | for j in {"A","B","D"} 36 | do 37 | geva_v1beta -i chr${i}${j}_rec.bin -o ${i}${j}_RUN1 --positions chr${i}${j}_rec.pos --Ne 70000 --mut 6.5e-9 --hmm /data1/home/xuebo/software/geva-master/hmm/hmm_initial_probs.txt /data1/home/xuebo/software/geva-master/hmm/hmm_emission_probs.txt 38 | done 39 | done 40 | 41 | for i in {1..7} 42 | do 43 | for j in {"A","B","D"} 44 | do 45 | Rscript /data1/home/xuebo/software/geva-master/estimate.R ${i}${j}_RUN1.pairs.txt 70000 46 | done 47 | done 48 | 49 | ---------------------------------------------本地画图--------------------------------------------- 50 | xuebo@204:/data2/xuebo/Projects/Speciation/Variant_Age/rec 51 | 52 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/06_Allele_Age") 53 | 54 | for i in `ls *rec.marker.txt`; do sed '2d' $i | sed '$d' > ${i::-11}.txt; done 55 | for i in `ls *RUN1.sites2.txt`; do grep "J" $i | sed '1i MarkerID\tClock\tN_Concordant\tN_Discordant\tPostMode' > chr${i::-11}.txt; done 56 | -------------------------------------------------------------------------------- /01_Vmap1.1R/53_37gene_haplotype.r: -------------------------------------------------------------------------------- 1 | #统一设置:注释内容及颜色 2 | library(pheatmap) 3 | require(reshape) 4 | require (rworldmap) 5 | require(rworldxtra) 6 | library(RColorBrewer) 7 | cols <- c("#225EA8","#DEEBF7","#FEB24C","#BD0026") 8 | setwd("/Users/guoyafei/Documents/01_VMap1.1/02_Environment/08_snpEff/VCF_withAn/225_TXT") 9 | #annotation_col <- read.table("/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/E6_Landrace_locate_225.txt",header=T,stringsAsFactors = T) 10 | #rownames(annotation_col) = c(1:225) 11 | annotation <- read.table("/Users/guoyafei/Documents/01_VMap1.1/02_Environment/01_RDA_plot/select_taxa4.txt", header=T, stringsAsFactors = F, sep="\t") 12 | sub <- annotation[!is.na(annotation$ID),] 13 | rownames(sub) <- sub$ID 14 | anno <- sub[,7,drop=FALSE] 15 | #-------------------------------------------------------------------------------------------------------------------------------------- 16 | #AB 17 | AB_name <- read.table("/Users/guoyafei/Documents/01_VMap1.1/02_Environment/08_snpEff/VCF_withAn/225_TXT/AB_name.txt",header=F,stringsAsFactors = F) 18 | AB_anno <- anno[which(rownames(anno) %in% AB_name[,1]),1,drop=F] 19 | ann_color = list( 20 | heatmap = c(Wild_emmer = "#8C510A", Domesticated_emmer = "#DFC27D", Freethreshing = "#F6E8C3", EU="#66C2A5", WA= "#FC8D62",CA="#8DA0CB", EA ="#E78AC3",SA="#A6D854",Tibet="#FFD92F")) 21 | path <- "/Users/guoyafei/Documents/01_VMap1.1/02_Environment/08_snpEff/VCF_withAn/225_TXT/AB" ##文件目录 22 | fileNames <- dir(path) ##获取该路径下的文件名 23 | filePath <- sapply(fileNames, function(x){ 24 | paste(path,x,sep='/')}) ##生成读取文件路径 25 | data <- lapply(filePath, function(x){ 26 | read.table(x, header=F,stringsAsFactors = F)}) 27 | AB_file <-read.table("AB_file_name.txt",header=F,stringsAsFactors = F) 28 | AB_tit <- AB_file[,1] 29 | pdf("test.pdf") 30 | for (i in c(1:length(data))){ 31 | all <- as.data.frame(data[[i]]) 32 | colnames(all) <- AB_name[,1] 33 | all2 <- all[, rownames(AB_anno)] 34 | pheatmap(all2, show_rownames=FALSE, show_colnames=FALSE,color = cols, legend_breaks = -1:2, legend_labels = c("./.", "0/0", "0/1", "1/1"), cluster_col = F, cluster_row = FALSE, annotation = AB_anno, annotation_colors = ann_color,annotation_names_col = F,main=AB_tit[i]) 35 | } 36 | dev.off() 37 | 38 | #-------------------------------------------------------------------------------------------------------------------------------------- 39 | #D 40 | ann_color = list( 41 | #Growing_Habit = c(Facultative = "yellow", Spring="orange", Winter="blue"), 42 | heatmap = c(Strangulata = "#8C510A", EU="#66C2A5", WA= "#FC8D62",CA="#8DA0CB", EA ="#E78AC3",SA="#A6D854",Tibet="#FFD92F")) 43 | 44 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/08_snpEff/VCF_withAn/225_TXT/D" ##文件目录 45 | fileNames <- dir(path) ##获取该路径下的文件名 46 | filePath <- sapply(fileNames, function(x){ 47 | paste(path,x,sep='/')}) ##生成读取文件路径 48 | data <- lapply(filePath, function(x){ 49 | read.table(x, header=F,stringsAsFactors = F)}) 50 | 51 | D_name <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/08_snpEff/VCF_withAn/225_TXT/D_name.txt",header=F,stringsAsFactors = F) 52 | D_anno <- anno[which(rownames(anno) %in% D_name[,1]),1,drop=F] 53 | D_file <-read.table("D_file_name.txt",header=F,stringsAsFactors = F) 54 | D_tit <- D_file[,1] 55 | pdf("D.pdf") 56 | for (i in c(1:length(data))){ 57 | all <- as.data.frame(data[[i]]) 58 | colnames(all) <- D_name[,1] 59 | all <- all[, rownames(D_anno)] 60 | pheatmap(all, show_rownames=FALSE, show_colnames=FALSE,color = cols, legend_breaks = -1:2, legend_labels = c("./.", "0/0", "0/1", "1/1"), cluster_col = F, cluster_row = FALSE, annotation = D_anno, annotation_colors = ann_color,annotation_names_col = F,main=D_tit[i]) 61 | } 62 | dev.off() 63 | 64 | #annotation_colors = ann_colors 65 | #接下来可以使用06_Qmatrix_PieMap.r来画在地图上的单倍型分布。 66 | 67 | #plot haplotype heatmap 68 | brewer.pal(9, "YlGnBu")[c(7)] 69 | "#225EA8" 70 | brewer.pal(9, "Blues")[c(2)] 71 | "#DEEBF7" 72 | brewer.pal(9, "YlOrRd")[c(4)] 73 | "#FEB24C" 74 | brewer.pal(9, "YlOrRd")[c(8)] 75 | "#BD0026" 76 | cols <- c("#225EA8","#DEEBF7","#FEB24C","#BD0026") 77 | -------------------------------------------------------------------------------- /01_Vmap1.1R/57_Gene_HaploType_Map.r: -------------------------------------------------------------------------------- 1 | #work directory: 2 | #/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/sample_group.txt 3 | #/data1/home/yafei/003_Project3/bayenv/13pop/candidate_gene 4 | #/data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225 5 | #Rht-B1: 6 | #TraesCS4B02G043100 7 | #chr21 start:30861268 end:30863723 8 | #ppd-D1 9 | #TraesCS2D01G079600 10 | #chr11 start:33952048 end:33956269 11 | cat *cloned.gene | sort | uniq -c | awk '{print $2"\t"$3}' > top5.candidate.txt 12 | awk '{print $2}' top5.candidate.txt > test 13 | grep -f test gene_v1.1_Lulab.gff3 |awk '{print $1"\t"$4"\t"$5"\t"$9}' |awk -F";" '{print $1}' |sed 's/ID=//' > test2 14 | awk 'NR==FNR{a[$2]=$1;b[$2]=$2} NR!=FNR {if($4 in b) {print $0"\t"a[$4]}}' top5.candidate.txt test2 | awk '{print $1"\t"$2-5000"\t"$3+5000"\t"$4"\t"$5}' |sed '1i Chr\tstart-5k\tend+5k\tID\tName' > gene_region.txt 15 | 16 | cat gene_region.txt |while read chr from to ID Name 17 | do 18 | if [ $chr != "Chr" ];then 19 | bcftools filter /data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/chr${chr}.E6_Landrace_locate.vcf.gz --regions ${chr}:${from}-${to} > ${ID}-${Name}.vcf 20 | fi 21 | done 22 | for i in `ls *vcf` 23 | do 24 | 25 | vcftools --vcf $i --012 26 | paste out.012.pos <(cat out.012 | datamash transpose | sed '1d' ) > ${i::-4}.vcf.txt 27 | cat out.012.indv | datamash transpose | awk '{print "File\tChr\tPos\t"$0}' > indi.txt 28 | rm out.012* 29 | done 30 | for i in `ls *vcf.txt` 31 | do 32 | awk '{print FILENAME"\t"$0}' $i | sed 's/.vcf.txt//g' > ${i::-4}.vcf.txt2 33 | done 34 | cat indi.txt *vcf.txt2 > all.pos.txt 35 | rm TraesCS* 36 | 37 | library(reshape) 38 | library(ggplot2) 39 | library(RColorBrewer) 40 | require (rworldmap) 41 | setwd("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/") 42 | data <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/01_RDA_plot/225.taxa.txt", header=T,row.names = 1, sep="\t", stringsAsFactors = F) 43 | taxa <- read.table("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/20210829/Cluster_Location/cluster_70.txt",header=T,row.names = 1, stringsAsFactors = F) 44 | for(i in names(table(data$File))){ 45 | sub <- data[which(data$File==i),] 46 | file <- paste(i,".pdf",sep="") 47 | pdf(file) 48 | for( j in c(1:dim(sub)[1])){ 49 | tit <- sub[j,3] 50 | sample <- sub[j,4:228] 51 | loc <- taxa[names(sample),4:5] 52 | loc$type <- as.numeric(sample[1,]) 53 | loc$value <- 1 54 | wheat_reshape <- cast(loc,Latitude_70+Longitude_70~type) 55 | wheat_reshape2 <- as.data.frame(wheat_reshape) 56 | name <- names(table(loc$type)) 57 | if((j-1)%%4 !=0){ 58 | mapPies(wheat_reshape2,xlim=c(-10,130),ylim=c(10,70),nameX="Longitude_70",nameY="Latitude_70",nameZs=name,symbolSize=1.5, 59 | zColours=brewer.pal(8, "Set3")[c(2,3,4,5)],barOrient='vert',oceanCol="white",landCol=brewer.pal(9,"Pastel1")[9],main="tit") 60 | legend(25,95,box.lty=0,bg="transparent","108 landrace GrowHabbit", col="black") 61 | }else{ 62 | par(mfrow=c(2,2),oma=c(1,1,1,1), mar=c(0,1,0,1), cex=1) 63 | mapPies(wheat_reshape2,xlim=c(-10,130),ylim=c(10,70),nameX="Longitude_70",nameY="Latitude_70",nameZs=name,symbolSize=1.5, 64 | zColours=brewer.pal(12, "Set3")[c(2,3,4,5)],barOrient='vert',oceanCol="white",landCol=brewer.pal(9,"Pastel1")[9], main="tit") 65 | legend(25,95,box.lty=0,bg="transparent",tit, col="black") 66 | } 67 | } 68 | dev.off() 69 | } 70 | -------------------------------------------------------------------------------- /01_Vmap1.1R/59_Depth.R: -------------------------------------------------------------------------------- 1 | #depth.r 2 | library(cowplot) 3 | library(ggExtra) 4 | library(forcats) 5 | library(ggplot2) 6 | library(RColorBrewer) 7 | library(aplot) 8 | setwd("/Users/guoyafei/Desktop/NP/reviewer/depth") 9 | # method 1 ------------ 10 | dataFile <- c("tetra_depth.txt.gz") 11 | df1 <- read.delim(dataFile) %>% mutate(GenomeType = "AABB") 12 | dataFile <- c("hexa_depth.txt.gz") 13 | df2 <- read.delim(dataFile)%>% mutate(GenomeType = "AABBDD") 14 | dataFile <- c("diploid_depth.txt.gz") 15 | df3 <- read.delim(dataFile)%>% mutate(GenomeType = "DD") 16 | 17 | df <- rbind(df1,df2,df3) %>% 18 | group_by(GenomeType) %>% 19 | slice_sample(.,n = 3000) %>% 20 | ungroup() 21 | 22 | data <- read.table("all_100k.depth",header=F,stringsAsFactors = F) 23 | colnames(data) <- c("Depth","SD") 24 | 25 | colB <- c('#ffd702','#fc6e6e',"#87cef9") 26 | df$GenomeType <- factor(df$GenomeType,levels = c("AABB","AABBDD","DD")) 27 | 28 | p <- ggplot(data,aes(x=AverageDepth,y=SD))+ 29 | geom_point(alpha=0.5,shape=1,size=0.8)+ 30 | xlab("Mean of depth")+ylab("SD")+ 31 | #scale_color_manual(values = colB)+ 32 | xlim(0,15)+ylim(0,10)+ 33 | theme( 34 | panel.background = element_blank(), 35 | panel.border = element_rect(colour = "black", fill=NA, size=0.7) 36 | ,legend.position = 'none' 37 | ,text = element_text(size = 14)) 38 | 39 | p 40 | p2 <- ggMarginal(p, type="density",groupColour = F, groupFill = F) 41 | p2 42 | ggsave(p2,filename = "~/Documents/test.pdf",height = 4,width = 4) 43 | 44 | # method 2 ------------ 45 | data <- read.table("all_100k.depth",header=F,stringsAsFactors = F) 46 | colnames(data) <- c("Depth","SD") 47 | data <- data[which(data$Depth > 0 ),] 48 | data <- data[which(data$Depth < 15 & data$SD < 10),] 49 | data <- data[sample(1:85255, 30000),] 50 | p1<-ggplot(data,aes(x = Depth, y = SD))+ 51 | geom_point(alpha= 0.1)+ 52 | scale_color_brewer(palette = "Set2")+ 53 | #scale_color_manual(values=c("green","blue","grey"))+ 54 | theme_bw() 55 | #geom_hline(yintercept = 3.86971,lty="dashed")+ 56 | #geom_hline(yintercept = 6.15844,lty="dashed")+ 57 | #geom_vline(xintercept = 3.91843,lty="dashed")+ 58 | #geom_vline(xintercept = 9.36970,lty="dashed") 59 | p2<-ggplot(data,aes(Depth))+ 60 | geom_density(fill="grey",alpha=0.5)+ 61 | scale_y_continuous(expand = c(0,0))+ 62 | theme_minimal()+ 63 | theme(axis.title = element_blank(), 64 | axis.text = element_blank(), 65 | axis.ticks = element_blank()) 66 | p3<-ggplot(data,aes(SD))+ 67 | geom_density(fill="grey",alpha=0.5)+ 68 | scale_y_continuous(expand = c(0,0))+ 69 | theme_minimal()+ 70 | theme(axis.title = element_blank(), 71 | axis.text = element_blank(), 72 | axis.ticks = element_blank())+ 73 | coord_flip() 74 | #累计分布函数 75 | p4<-ggplot(data, aes(Depth)) + 76 | theme_minimal()+ 77 | theme(axis.title = element_blank(), 78 | axis.text = element_blank(), 79 | axis.ticks = element_blank())+ 80 | scale_color_brewer(palette = "Set2")+ 81 | stat_ecdf()+ 82 | scale_y_reverse() 83 | p5<-ggplot(data, aes(SD)) + 84 | scale_color_brewer(palette = "Set2")+ 85 | theme_minimal()+ 86 | theme(axis.title = element_blank(), 87 | axis.text = element_blank(), 88 | axis.ticks = element_blank())+ 89 | stat_ecdf()+ 90 | coord_flip()+ 91 | scale_y_reverse() 92 | p1%>% 93 | insert_top(p2,height = 0.3)%>% 94 | insert_right(p3,0.3) 95 | 96 | -------------------------------------------------------------------------------- /01_Vmap1.1R/60_response.R: -------------------------------------------------------------------------------- 1 | # 相关系数 2 | library(pheatmap) 3 | require(reshape) 4 | require (rworldmap) 5 | require(rworldxtra) 6 | library(RColorBrewer) 7 | setwd("/Users/guoyafei/Desktop/btr1/ibs") 8 | input <- c("btr1-A.10k.all.ibs","btr1-A.1M.all.ibs","btr1-A.200k.all.ibs","btr1-A.2k.all.ibs","btr1-A.500k.all.ibs","btr1-B.10k.all.ibs","btr1-B.1M.all.ibs","btr1-B.200k.all.ibs","btr1-B.2k.all.ibs","btr1-B.500k.all.ibs") 9 | output <- c("btr1-A.10k.all.ibs.pdf","btr1-A.1M.all.ibs.pdf","btr1-A.200k.all.ibs.pdf","btr1-A.2k.all.ibs.pdf","btr1-A.500k.all.ibs.pdf","btr1-B.10k.all.ibs.pdf","btr1-B.1M.all.ibs.pdf","btr1-B.200k.all.ibs.pdf","btr1-B.2k.all.ibs.pdf","btr1-B.500k.all.ibs.pdf") 10 | 11 | for(i in c(1:length(input))){ 12 | test <- read.table(input[i],row.names = 1, header = T) 13 | pdf(output[i],width=10,height=10) 14 | pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="ward.D2") 15 | pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="complete") 16 | pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="average") 17 | pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="mcquitty") 18 | pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="median") 19 | pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="centroid") 20 | pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="single") 21 | dev.off() 22 | } 23 | -------------------------------------------------------------------------------- /01_Vmap1.1R/Scripts.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /01_Vmap1.1R/assets/16442473466436.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/01_Vmap1.1R/assets/16442473466436.jpg -------------------------------------------------------------------------------- /02_Vmap3R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/02_Vmap3R/.DS_Store -------------------------------------------------------------------------------- /02_Vmap3R/01_Tree_Anno.r: -------------------------------------------------------------------------------- 1 | library(ggmap)#Load libraries 2 | library(ggplot2) 3 | hpars<-read.table("https://sites.google.com/ 4 | site/arunsethuraman1/teaching/hpars.dat?revision=1") 5 | 6 | par(mfrow = c(2, 2)) 7 | pie(rep(1,4),labels=c("AF","AM","AS","EU"), col = c("#8DD3C7","#FFFFB3","#BEBADA","#FB8072"), main = "Branch-Region") 8 | pie(rep(1,7),labels=c("Cultivar","Domesticated_emmer","Free_threshing_tetraploids","Landrace","OtherHexaploid","OtherTetraploid","Wild_emmer"), col = c("#836FFF","#87CEFF","#8B636C","#8DD3C7","#9B30FF","#B3DE69","#B452CD"), main = "Label-Subspecies") 9 | pie(rep(1,5),labels=c("Kong","LuLab","WEGA","ZKS","ZN"), col = c("#8DD3C7","#FB8072","#B3DE69","#8B636C","#80B1D3"), main = "Branch-Source") 10 | pie(rep(1,4),labels=c("ExclusionHexaploid","ExclusionTetraploid","Hexaploid","Tetraploid"), col = c("#8DD3C7","#FFFFB3","#BEBADA","#FB8072"), main = "Label-In-Exclu") 11 | -------------------------------------------------------------------------------- /02_Vmap3R/02_Enrich.r: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(RColorBrewer) 3 | display.brewer.all() 4 | col1 <- brewer.pal(n = 8, name = "YlOrBr")[c(2,5)] 5 | col2 <- brewer.pal(n = 8, name = "YlGnBu")[c(2,5)] 6 | col3 <- brewer.pal(n = 8, name = "YlGn")[c(2,5)] 7 | col4 <- brewer.pal(n = 8, name = "Purples")[c(2,5)] 8 | col5 <- brewer.pal(n = 8, name = "Greys")[c(2,5)] 9 | 10 | #setwd("/Users/guoyafei/Documents/Lulab/Project-2-Migration/anno_gene") 11 | #nameA <- c("Domesticated_einkorn", "Domesticated_emmer","Durum","EA","EU","Indian_dwarf","Khorasan_wheat","Landrace","Macha","Persian_wheat","Polish_wheat","Rivet_wheat","SCA","Spelt","Tibetan_semi_wild","Urartu","Vavilovii","WA","Wild_Einkorn","Wild_emmer","Xinjiang_wheat") 12 | #nameB <- c("Domesticated_emmer","Durum","EA","EU","Khorasan_wheat", "Landrace","Macha","Persian_wheat","Polish_wheat","Rivet_wheat","SCA","Spelt","Speltoides","Vavilovii","WA","Wild_emmer","Xinjiang_wheat","Yunan_wheat") 13 | #nameD <- c("Anathera","Club_wheat","EA","EU","Indian_dwarf","Landrace","Macha","Meyeri","SCA","Spelt","Strangulata","Tibetan_semi_wild","Vavilovii","WA","Xinjiang_wheat","Yunan_wheat") 14 | 15 | path <- "//Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/Go/TXT" 16 | fileNames <- dir(path) 17 | filePath <- sapply(fileNames, function(x){ 18 | paste(path,x,sep='/')}) 19 | data <- lapply(filePath, function(x){ 20 | read.table(x, header=T, sep = "\t", stringsAsFactors=F)}) 21 | #for(i in 1:length(data)){ 22 | # data[[i]]$Name <- nameB[i] 23 | #} 24 | #all <- data.frame() 25 | #for(i in 1:length(data)){ 26 | # all <- rbind(all, data[[i]]) 27 | #} 28 | #plot_list = list() 29 | #for(i in 1:16){ 30 | p <- ggplot(data=data[[6]])+ 31 | geom_bar(aes(x= Description, y=Number.in.input.list, fill=(FDR)), stat='identity') + 32 | coord_flip() + 33 | #facet_grid(.~Name,scales="free") + 34 | #facet_wrap(~Name,ncol = 1)+ 35 | scale_fill_gradient(expression(FDR),low=col1[2], high = col1[1]) + 36 | ylab("Gene number") + 37 | xlab("GO term description") + 38 | #expand_limits(y=c(0,8))+ 39 | #ylim(0,100)+ 40 | theme( 41 | axis.text.x=element_text(color="black",size=rel(0.8)), 42 | axis.text.y=element_text(color="black", size=rel(0.3)), 43 | axis.title.x = element_text(color="black", size=rel(5)), 44 | axis.title.y = element_blank(), 45 | legend.text=element_text(color="black",size=rel(0.2)), 46 | legend.title = element_text(color="black",size=rel(0.7)) 47 | #legend.position=c(0,1),legend.justification=c(-1,0) 48 | #legend.position="top", 49 | )+ 50 | # scale_x_discrete(limits= c("Domesticated_einkorn", "Domesticated_emmer","Durum","EA","EU","Indian_dwarf","Khorasan_wheat","Landrace","Macha","Persian_wheat","Polish_wheat","Rivet_wheat","SCA","Spelt","Tibetan_semi_wild","Urartu","Vavilovii","WA","Wild_Einkorn","Wild_emmer","Xinjiang_wheat"))+ 51 | theme_bw()+theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=1,colour="black")) 52 | #plot_list[[i]] = p 53 | #} 54 | for (i in 1:16) { 55 | file_name = paste("D",i,".pdf",sep="") 56 | pdf(file_name,height = 12,width = 9) 57 | print(plot_list[[i]]) 58 | dev.off() 59 | } 60 | 61 | -------------------------------------------------------------------------------- /02_Vmap3R/04_IBS.r: -------------------------------------------------------------------------------- 1 | #Required----- 2 | library(RColorBrewer) 3 | #读取Subspecies文件,共12个。 4 | path <- "/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/05_VcfCheck/04_TaxaGroup/03_Subspecies" 5 | fileNames <- dir(path) 6 | filePath <- sapply(fileNames, function(x){ 7 | paste(path,x,sep='/')}) 8 | data <- lapply(filePath, function(x){ 9 | read.table(x, header=T,stringsAsFactors=F)}) 10 | length(data) 11 | #A_IBS---- 12 | #读取ibs文件,并且计算每个subspecies与CS的IBS平均值。 13 | spName <- c("CS", "Cultivar", "Domesticated_emmer", "Free_threshing_tetraploid", "Landrace", "Other_hexaploid", "Other_tetraploid", "Stranglata", "Tibetan_semi_wild", "Wild_emmer") 14 | A <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/07_Test0720/depth_statics/IBS/1A.IBS.txt",header=T,stringsAsFactors=F) 15 | #dim(A) 16 | #1355 1356 17 | A$Lineage <- NA 18 | rownames(A) <- A$Dxy 19 | #给每一行赋subspecies的值 20 | 21 | for(i in 1:length(data)){ 22 | A[which(A$Dxy %in% data[[i]][,1]),1357] <- spName[i] 23 | } 24 | 25 | #提取CS的两列 26 | subA <- cbind(A[,which(colnames(A) %in% data[[1]][,1])],A[,1357]) 27 | subA$lineage <- "A" 28 | colnames(subA) <- c("IBS1","IBS2","subspecies","lineage") 29 | #B_IBS---- 30 | #读取ibs文件,并且计算每个subspecies与CS的IBS平均值。 31 | spName <- c("CS", "Cultivar", "Domesticated_emmer", "Free_threshing_tetraploid", "Landrace", "Other_hexaploid", "Other_tetraploid", "Stranglata", "Tibetan_semi_wild", "Wild_emmer") 32 | B <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/07_Test0720/depth_statics/IBS/1B.IBS.txt",header=T,stringsAsFactors=F) 33 | #dim(B) 34 | #1355 1356 35 | B$Lineage <- NA 36 | rownames(B) <- B$Dxy 37 | #给每一行赋subspecies的值 38 | for(i in 1:length(data)){ 39 | B[which(B$Dxy %in% data[[i]][,1]),1357] <- spName[i] 40 | } 41 | #提取CS的两列 42 | subB <- cbind(B[,which(colnames(B) %in% data[[1]][,1])],B[,1357]) 43 | subB$lineage <- "B" 44 | colnames(subB) <- c("IBS1","IBS2","subspecies","lineage") 45 | #D_IBS---- 46 | #读取ibs文件,并且计算每个subspecies与CS的IBS平均值。 47 | spName <- c("CS", "Cultivar", "Domesticated_emmer", "Free_threshing_tetraploid", "Landrace", "Other_hexaploid", "Other_tetraploid", "Stranglata", "Tibetan_semi_wild", "Wild_emmer") 48 | D <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/07_Test0720/depth_statics/IBS/1D.IBS.txt",header=T,stringsAsFactors=F) 49 | #dim(D) 50 | #1334 1336 51 | D$Lineage <- NA 52 | rownames(D) <- D$Dxy 53 | #给每一行赋subspecies的值 54 | for(i in 1:length(data)){ 55 | D[which(D$Dxy %in% data[[i]][,1]),1337] <- spName[i] 56 | } 57 | #提取CS的两列 58 | subD <- cbind(D[,which(colnames(D) %in% data[[1]][,1])],D[,1337]) 59 | subD$lineage <- "D" 60 | colnames(subD) <- c("IBS1","IBS2","subspecies","lineage") 61 | 62 | #画每组比较的boxplot图 63 | all <- rbind(subA,subB,subD) 64 | p <- ggplot(all, aes(subspecies, IBS1)) + 65 | geom_boxplot(outlier.shape = NA, outlier.colour = NA, aes(fill=factor(lineage))) + 66 | scale_fill_brewer(palette = "Accent") + 67 | guides(fill=guide_legend(titlec = NULL)) + 68 | scale_x_discrete(limits = c("Wild_emmer","Domesticated_emmer", "Free_threshing_tetraploid", "Other_tetraploid", "Landrace", "Cultivar", "Other_hexaploid", "Tibetan_semi_wild", "Stranglata"))+ 69 | ylab("PI") + 70 | xlab("Lineages") + 71 | scale_y_continuous(limits=c(0,0.25)) + 72 | theme_classic() + 73 | theme(axis.text.x = element_text(size=12, angle = 30, hjust = 0.5, vjust = 0.5), axis.text.y = element_text(size=15)) 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /02_Vmap3R/06_Map2.r: -------------------------------------------------------------------------------- 1 | #library(ggplot2) 2 | library(ggmap) 3 | #library(sp) 4 | #library(maptools) 5 | #library(maps) 6 | #library(psych) 7 | taxa <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/05_VcfCheck/taxa_location.txt", header=T, stringsAsFactors = F) 8 | taxa_AB <- taxa[which(taxa$Ploidy=="AABB"),] 9 | taxa_ABD <- taxa[which(taxa$Ploidy=="AABBDD"),] 10 | taxa_D <- taxa[which(taxa$Ploidy=="DD"),] 11 | 12 | #绘图 13 | mp<-NULL 14 | mapworld<-borders("world",colour = "gray70",fill="white") 15 | mp<-ggplot()+mapworld+ylim(-90,90)+theme_classic() 16 | 17 | #AABBDD---- 18 | mp2<-mp+geom_point(aes(x=taxa_ABD$Longitude, y=taxa_ABD$Latitude, color=taxa_ABD$TreeValidatedGroupbySubspecies),size=1.5)+scale_size(range=c(1,1))+ theme_classic()+ 19 | theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+ 20 | #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 21 | # breaks=c("AA", "SS", "DD", "AABB","AABBDD"), 22 | # labels=c("AA", "SS", "DD", "AABB","AABBDD"))+ 23 | theme(axis.text = element_blank()) + ## 删去所有刻度标签 24 | theme(axis.ticks = element_blank()) + 25 | theme(panel.grid =element_blank()) + 26 | theme(axis.ticks.y = element_blank()) 27 | 28 | #AABB---- 29 | mp2<-mp+geom_point(aes(x=taxa_AB$Longitude, y=taxa_AB$Latitude, color=taxa_AB$TreeValidatedGroupbySubspecies),size=1.5)+scale_size(range=c(1,1))+ theme_classic()+ 30 | theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+ 31 | #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 32 | # breaks=c("AA", "SS", "DD", "AABB","AABBDD"), 33 | # labels=c("AA", "SS", "DD", "AABB","AABBDD"))+ 34 | theme(axis.text = element_blank()) + ## 删去所有刻度标签 35 | theme(axis.ticks = element_blank()) + 36 | theme(panel.grid =element_blank()) + 37 | theme(axis.ticks.y = element_blank()) 38 | 39 | #DD---- 40 | mp2<-mp+geom_point(aes(x=taxa_D$Longitude, y=taxa_D$Latitude, color=taxa_D$TreeValidatedGroupbySubspecies),size=1.5)+scale_size(range=c(1,1))+ theme_classic()+ 41 | theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+ 42 | #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 43 | # breaks=c("AA", "SS", "DD", "AABB","AABBDD"), 44 | # labels=c("AA", "SS", "DD", "AABB","AABBDD"))+ 45 | theme(axis.text = element_blank()) + ## 删去所有刻度标签 46 | theme(axis.ticks = element_blank()) + 47 | theme(panel.grid =element_blank()) + 48 | theme(axis.ticks.y = element_blank()) 49 | #计算各位点的点的数量并标记---- 50 | pos_count <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/05_VcfCheck/pos_count.txt",header=T,stringsAsFactors = F) 51 | mp2<-mp+geom_point(aes(x=pos_count$Longitude, y=pos_count$Latitude),size=2)+scale_size(range=c(1,1))+ theme_classic()+ 52 | theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+ 53 | #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 54 | # breaks=c("AA", "SS", "DD", "AABB","AABBDD"), 55 | # labels=c("AA", "SS", "DD", "AABB","AABBDD"))+ 56 | theme(axis.text = element_blank()) + ## 删去所有刻度标签 57 | theme(axis.ticks = element_blank()) + 58 | theme(panel.grid =element_blank()) + 59 | theme(axis.ticks.y = element_blank()) 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /02_Vmap3R/07_Nucdiff.sh: -------------------------------------------------------------------------------- 1 | #安装mumer,biopython packages 2 | nucdiff Chinese_Spring_chr1A.fa Jagger_chr1A.fa ./Out my_prefix CS_Jagger 3 | #无结果 4 | #切断query序列,逢N就断开,再比对 5 | nohup cat jagger_chr1A.fa | seqkit fx2tab | cut -f 2 | sed -r 's/n+/\n/gi' | cat -n | seqkit tab2fx | seqkit replace -p "(.+)" -r "Contig{nr}" > jagger_chr1A_contig.fa & 6 | nucdiff Chinese_Spring_chr1A.fa jagger_chr1A_contig.fa ./Out_contig my_prefix CS_Jagger_contig 7 | -------------------------------------------------------------------------------- /02_Vmap3R/09_PCA2.r: -------------------------------------------------------------------------------- 1 | install.packages("gdata") 2 | library(cinaR) 3 | library(ggplot2) 4 | library(RColorBrewer) 5 | library(aplot) 6 | library(grid) 7 | library(gridExtra) 8 | display.brewer.all() 9 | col <- brewer.pal(n = 8, name = "Spectral") 10 | setwd("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure") 11 | eigvec<- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/ABlineage.maf0.05.5k.eigenvec",header=T,stringsAsFactors = F) 12 | eigval <- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/ABlineage.maf0.05.5k.eigenval",header=F,stringsAsFactors = F) 13 | mds <- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/ABlineage.maf0.05.60k.mds",header=T,stringsAsFactors = F) 14 | popinfo <- "/Users/guoyafei/Documents/02_VmapIII/01_表格/V.2/Lulab_germplasm_Info.xlsx" 15 | poptable <- read.xls(popinfo, sheet=1, verbose=FALSE, fill=NA) 16 | popAB <- poptable[which(poptable$GenomeNAtype!="DD"),c(1,2,3,12,13,14,18,20)] 17 | colnames(popAB)[1] <- "FID" 18 | all <- merge(mds,popAB,by="FID") 19 | #PC1:19.03% PC2:11.18% 20 | #PC1:19.03% PC2:11.18% 21 | #colnames(popAB) <- c("order","exp_id","vcf_id","group","color","pch","group2") 22 | #source("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/16_分群/pca.plot2d.r") 23 | #pca_plot(eigenvector = eigvec, eigenvalue = eigval, 24 | # group = popinfo, key = key, outdir = od, 25 | # shape = T, shapes = pop$group2, border = T, border_size = 2.5, 26 | # line0 = T, line0_size = 1) 27 | p<-ggplot(all,aes(x=C1,y=C2,shape=as.factor(all$PCA_shape),color=as.factor(all$PCA_color))) 28 | p<-p+geom_point()+ 29 | scale_color_brewer(palette = "Accent")+ 30 | scale_shape_manual(values = c(1:11))+ 31 | theme_classic()+ 32 | theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'), plot.title = element_text(hjust = 0.5), legend.key = element_rect(fill = 'transparent')) + 33 | labs(x = 'PCA 1 (19.03%)', y = 'PCA 2 (11.18%)') + 34 | theme_bw() + 35 | theme( 36 | #legend.position="none", 37 | #panel.border = element_blank(), 38 | axis.line.y = element_line(), 39 | axis.line.x = element_line(), 40 | panel.grid.major.x = element_blank(), 41 | panel.grid.minor.x = element_blank(), 42 | axis.text.x=element_text(size=15), 43 | axis.text.y=element_text(size=15), 44 | axis.title.y=element_text(size = 15), 45 | axis.title.x=element_text(size = 15), 46 | ) 47 | site <- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/AB.maf0.05_siteQCfile.txt",header=T,stringsAsFactors = F) 48 | taxa <- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/AB.maf0.05_taxaQCfile.txt",header=T,stringsAsFactors = F) 49 | p0 <- ggplot(data=site,aes(x=MissingRate)) + 50 | geom_histogram(binwidth=0.01,fill="#69b3a2", 51 | color="#e9ecef", alpha=0.9)+ 52 | theme_bw()+ 53 | labs(x="MissingRate",y="SNP Count")+ 54 | xlim(0,0.2)+ 55 | theme( 56 | #legend.position="none", 57 | #panel.border = element_blank(), 58 | axis.line.y = element_line(), 59 | axis.line.x = element_line(), 60 | panel.grid.major.x = element_blank(), 61 | panel.grid.minor.x = element_blank(), 62 | axis.text.x=element_text(size=15), 63 | axis.text.y=element_text(size=15), 64 | axis.title.y=element_text(size = 15), 65 | axis.title.x=element_text(size = 15), 66 | 67 | ) 68 | p0 69 | -------------------------------------------------------------------------------- /02_Vmap3R/10_Maf-Dis.r: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | setwd("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Test_chr001") 3 | data <- read.table("R2-Maf-Dis.txt",header=T,stringsAsFactors = F) 4 | library(reshape) 5 | data$R_bin <- NA 6 | data$Maf_bin <- NA 7 | data$Dis_bin <- NA 8 | 9 | data[which(data$R.2 >=0 & data$R.2 < 0.5),4] <- "0<=r2<0.5" 10 | data[which(data$R.2 >=0.5 & data$R.2 < 0.75),4] <- "0.5<=r2<0.75" 11 | data[which(data$R.2 >=0.75 & data$R.2 < 1),4] <- "0.75<=r2<1" 12 | data[which(data$R.2 == 1),4] <- "r2=1" 13 | 14 | data[which(data$SNP_A_MAF >=0 & data$SNP_A_MAF < 0.05),5] <- "0-0.05" 15 | data[which(data$SNP_A_MAF >=0.05 & data$SNP_A_MAF < 0.1),5] <- "0.05-0.1" 16 | data[which(data$SNP_A_MAF >=0.1 & data$SNP_A_MAF < 0.15),5] <- "0.1-0.15" 17 | data[which(data$SNP_A_MAF >=0.15 & data$SNP_A_MAF < 0.2),5] <- "0.15-0.2" 18 | data[which(data$SNP_A_MAF >=0.2 & data$SNP_A_MAF < 0.25),5] <- "0.2-0.25" 19 | data[which(data$SNP_A_MAF >=0.25 & data$SNP_A_MAF < 0.3),5] <- "0.25-0.3" 20 | data[which(data$SNP_A_MAF >=0.3 & data$SNP_A_MAF < 0.35),5] <- "0.3-0.35" 21 | data[which(data$SNP_A_MAF >=0.35 & data$SNP_A_MAF < 0.4),5] <- "0.35-0.4" 22 | data[which(data$SNP_A_MAF >=0.4 & data$SNP_A_MAF < 0.45),5] <- "0.4-0.45" 23 | data[which(data$SNP_A_MAF >=0.45 & data$SNP_A_MAF <= 0.5),5] <- "0.45-0.5" 24 | 25 | data[which(data$Distance >=0 & data$Distance < 10),6] <- "0-10" 26 | data[which(data$Distance >=10 & data$Distance < 20),6] <- "10-20" 27 | data[which(data$Distance >=20 & data$Distance < 30),6] <- "20-30" 28 | data[which(data$Distance >=30 & data$Distance < 40),6] <- "30-40" 29 | data[which(data$Distance >=40 & data$Distance < 50),6] <- "40-50" 30 | data[which(data$Distance >=50 & data$Distance < 60),6] <- "50-60" 31 | data[which(data$Distance >=60 & data$Distance < 70),6] <- "60-70" 32 | data[which(data$Distance >=70 & data$Distance < 80),6] <- "70-80" 33 | data[which(data$Distance >=80 & data$Distance < 90),6] <- "80-90" 34 | data[which(data$Distance >=90 & data$Distance <= 100),6] <- "90-100" 35 | 36 | R <- data[,c(4,5)] 37 | R$num <- 1 38 | md <- melt(R,id=(c("Maf_bin","R_bin"))) 39 | md2 <- rbind(bin1[1:2000,],bin2[1:2000,],bin3[1:2000,],bin4[1:2000,],bin5[1:2000,],bin6[1:2000,],bin7,bin8,bin9) 40 | newdata <- cast(md2,Maf_bin+R_bin~value) 41 | colnames(newdata)[3] <- "num" 42 | 43 | 44 | ggplot(data = newdata, mapping = aes(x = factor(Maf_bin), y = newdata$num, fill = newdata$R_bin)) + geom_bar(stat = 'identity', position = 'fill')+ 45 | theme_classic() + 46 | labs(x = "Maf", y = "Propotion of SNPs(%)", title = "Allele frequency and R2 for the 10M regions with same snp numbers") + 47 | theme(legend.text = element_text(size = 14),legend.title=element_blank(),plot.title = element_text(hjust = 0.5,size=20),axis.title = element_text(size = 20),axis.text.x = element_text(size = 15,vjust = 0.5, hjust = 0.5,angle = 45),axis.text.y = element_text(size = 15)) 48 | 49 | 50 | D <- data[!is.na(data$Dis_bin),c(5,6)] 51 | D$num <- 1 52 | md <- melt(D,id=(c("Maf_bin","Dis_bin"))) 53 | newdata <- cast(md,Maf_bin+Dis_bin~value) 54 | colnames(newdata)[3] <- "num" 55 | 56 | ggplot(data = newdata, mapping = aes(x = factor(Maf_bin), y = newdata$num, fill = newdata$Dis_bin)) + geom_bar(stat = 'identity', position = 'fill')+ 57 | theme_classic() + 58 | labs(x = "Maf", y = "Propotion of SNPs(%)", title = "Allele frequency and Distance of SNPs for the 10M regions") + 59 | theme(legend.text = element_text(size = 14),legend.title=element_blank(),plot.title = element_text(hjust = 0.5,size=20),axis.title = element_text(size = 20),axis.text.x = element_text(size = 15,vjust = 0.5, hjust = 0.5,angle = 45),axis.text.y = element_text(size = 15)) 60 | 61 | 62 | -------------------------------------------------------------------------------- /02_Vmap3R/11_Statistic.sh: -------------------------------------------------------------------------------- 1 | #提取样本 2 | for chr in {05,06,11,12,17,18,23,24,29,30,35,36,41,42} 3 | do 4 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/DD_VCF/chr0${chr}.vcf.gz --max-alleles 4 --keep /data2/yafei/004_Vmap3/VCF/taxa_DD.txt --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.vcf.gz & 5 | done b 6 | 7 | #过滤mac以及多等位基因-AABB 8 | for chr in {01,02,03,04,07,08,09,10,13,14,15,16,19,20,21,22,25,26,27,28,31,32,33,34,37,38,39,40} 9 | do 10 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/AABB_VCF/chr0${chr}.vcf.gz --min-alleles 2 --max-alleles 2 --mac 2 --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/chr0${chr}.mac.bialle.vcf.gz & 11 | done 12 | #过滤mac以及多等位基因-DD 13 | for chr in {05,06,11,12,17,18,23,24,29,30,35,36,41,42} 14 | do 15 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.vcf.gz --min-alleles 2 --max-alleles 2 --mac 2 --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.mac.bialle.vcf.gz 16 | done 17 | #统计-AABB 18 | java -jar /data2/yafei/004_Vmap3/VCF/CheckVcf.jar /data2/yafei/004_Vmap3/VCF/DD_VCF /data2/yafei/004_Vmap3/VCF/DD_VCF/checkVcf.txt 19 | 20 | #拆分VCF为INdel和SNP-AABB 21 | for chr in {01,02,03,04,07,08,09,10,13,14,15,16,19,20,21,22,25,26,27,28,31,32,33,34,37,38,39,40} 22 | do 23 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/filt_AABB/chr0${chr}.mac.bialle.vcf.gz --remove-indels --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/Snp/chr0${chr}.snp.vcf.gz & 24 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/filt_AABB/chr0${chr}.mac.bialle.vcf.gz --keep-only-indels --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/Indel/chr0${chr}.indel.vcf.gz & 25 | done 26 | #拆分VCF为INdel和SNP-DD 27 | for chr in {05,06,11,12,17,18,23,24,29,30,35,36,41,42} 28 | do 29 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.mac.bialle.vcf.gz --remove-indels --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/Snp/chr0${chr}.snp.vcf.gz & 30 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.mac.bialle.vcf.gz --keep-only-indels --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/Indel/chr0${chr}.indel.vcf.gz & 31 | done 32 | 33 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/AABB_VCF/chr002.vcf.gz --min-alleles 2 --max-alleles 2 --mac 2 --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/chr002.mac.bialle.vcf.gz & 34 | -------------------------------------------------------------------------------- /02_Vmap3R/13_VcfStatistic.r: -------------------------------------------------------------------------------- 1 | #过滤maf提取样本 2 | #AA 3 | for chr in {1,2,7,8,13,14,19,20,25,26,31,32,37,38} 4 | do 5 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/AA_taxa.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/AA/chr${chr}.mafAA.vcf.gz 6 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/AA/chr${chr}.mafAA.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/AA/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/AA/chr${chr}_taxaQCfile.txt > nohupA 2>& 1 7 | done 8 | #BB 9 | for chr in {3,4,9,10,15,16,21,22,27,28,33,34,39,40} 10 | do 11 | vcftools --gzvcf /data1/home/yafei/Project3/Vmap1.1/chr${chr}.all.vcf.gz --keep /data2/yafei/Project3/group/SS_taxa.txt --max-missing 0.1 --maf 0.0000001 --recode --stdout | bgzip -c > /data1/home/yafei/Project3/Maf/mafBB/chr${chr}.mafBB.vcf.gz 12 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data1/home/yafei/Project3/Maf/mafBB/chr${chr}.mafBB.vcf.gz --out /data1/home/yafei/Project3/Maf/mafBB/chr${chr}_siteQCfile.txt --out2 /data1/home/yafei/Project3/Maf/mafBB/chr${chr}_taxaQCfile.txt > nohupB 2>& 1 13 | done 14 | #DD 15 | for chr in {5,6,11,12,17,18,23,24,29,30,35,36,41,42} 16 | do 17 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/DD_taxa.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/DD/chr${chr}.mafDD.vcf.gz 18 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/DD/chr${chr}.mafDD.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/DD/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/DD/chr${chr}_taxaQCfile.txt > nohupD 2>& 1 19 | done 20 | #AABB 21 | for chr in {1,2,7,8,13,14,19,20,25,26,31,32,37,38,3,4,9,10,15,16,21,22,27,28,33,34,39,40} 22 | do 23 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/AABB_taxa.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/AABB/chr${chr}.mafAABB.vcf.gz 24 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/AABB/chr${chr}.mafAABB.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/AABB/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/AABB/chr${chr}_taxaQCfile.txt > nohupAB 2>& 1 25 | done 26 | #AABBDD 27 | for chr in {1..42} 28 | do 29 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/AABBDD_taxa.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/AABBDD/chr${chr}.mafAABBDD.vcf.gz 30 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/AABBDD/chr${chr}.mafAABBDD.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/AABBDD/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/AABBDD/chr${chr}_taxaQCfile.txt > nohupABD 2>& 1 31 | done 32 | #Landrace 33 | for chr in {1..42} 34 | do 35 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/subspecies/sub_Landrace_new.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/Landrace/chr${chr}.mafLandrace.vcf.gz 36 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/Landrace/chr${chr}.mafLandrace.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/Landrace/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/Landrace/chr${chr}_taxaQCfile.txt > nohupLand 2>& 1 37 | done 38 | #nohup sh merge_maf.sh & 39 | 40 | -------------------------------------------------------------------------------- /02_Vmap3R/14_Snp_Density.sh: -------------------------------------------------------------------------------- 1 | #统计VCF文件的snp的密度 2 | #hg19.bed 3 | chr1 248956422 4 | chr2 242193529 5 | chr3 198295559 6 | .... 7 | 8 | bedtools makewindows -g hg19.bed -w 1000000 > windows.bed 9 | bedtools coverage -a windows.bed -b test.vcf -counts > coverage.txt 10 | 11 | for i in {1..42} 12 | do 13 | bedtools coverage -a ${i}.bed -b ../chr00${i}.vcf.gz -counts > chr00${i}.coverage.txt 14 | done 15 | 16 | #如果VCF文件过大,gz文件大于20G会报错,解决办法如下: 17 | 18 | for i in {1,3,4,5,7,8,9} 19 | #{1,3,7,9} 20 | do 21 | bedmap --echo --count --delim '\t' ${i}.bed <( gunzip -c ../chr00${i}.vcf.gz | vcf2bed --max-mem=100G --sort-tmpdir="~/large/dir" ) > chr00${i}.coverage.txt 22 | done 23 | 24 | for i in {10,11,13,14,15,16,17,19,20,21,22,23,25,26,27,28,29,31,32,33,34,35,37,38,39,40,41,42} 25 | #{10,13,14,15,16,22,23,25,26,27,28,29,31,32,33,34,35,37,38,39,40,41,42} 26 | do 27 | bedmap --echo --count --delim '\t' ${i}.bed <( gunzip -c ../chr0${i}.vcf.gz | vcf2bed --max-mem=100G --sort-tmpdir="~/large/dir" ) > chr0${i}.coverage.txt 28 | done 29 | 30 | 31 | -------------------------------------------------------------------------------- /03_Analysis/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/03_Analysis/.DS_Store -------------------------------------------------------------------------------- /03_Analysis/01_ChangeFileFormat_1.r: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------转换WW原始文件.R------------------------------------------------------------------------------------------------------------- 2 | #读取文件:203服务器 3 | setwd("/data1/home/yafei/SNP/WW") 4 | all <- read.table("Balfourier_et_al_Wheat_Phylogeography_DataS2.tab",header=T,stringsAsFactors=F) 5 | names <- read.table("741names.txt",header=F,stringsAsFactors=F) 6 | convers <- read.table("tabw280kAlleleConversionTable.tab",header=T,stringsAsFactors=F) 7 | geno <- all[,which(colnames(all) %in% names[,1])] 8 | header <- all[,1:5] 9 | data <- rbind(header,geno) 10 | #write.table(alldata,"741.tab",row.names=F,quote=F,sep="\t") 11 | #data <- read.table("741.tab",header=T,stringsAsFactors=F) 12 | #修改染色体号和snp位点 13 | chr <- paste(rep(paste("chr",1:7,sep=""),each=3),rep(c("A","B","D"),times=7), sep = "") 14 | pos <- c(471304005,438720154,452179604, 462376173, 453218924,462216879,454103970,448155269,476235359,452555092,451014251,451004620,453230519,451372872,451901030,452440856,452077197,450509124,450046986,453822637,453812268) 15 | for(i in 1:21){ 16 | data[which(data$chromosome == chr[i]),3] <- i*2-1 17 | data[which(data$chromosome == i*2-1 & data$snpPosition > pos[i]),3] <- i*2 18 | data[which(data$chromosome == i*2 & data$snpPosition > pos[i]),4] <- data[which(data$chromosome == i*2),4] - pos[i] 19 | orderd <- data[order(data[,3], data[,4]), ] 20 | } 21 | sub <- merge(data[,c(1:4)], convers,by.x="psId",by.y="probesetId",all.x=TRUE) 22 | sub_orderd <- sub[order(sub[,3], sub[,4]), ] 23 | #写42个(.convers)文件 24 | varName <- paste("chr",1:42,".convers",sep="") 25 | for(i in 1:42){ 26 | write.table(sub_orderd[which(sub_orderd$chromosome == i),], varName[i],row.names=F,quote=F,sep="\t") 27 | } 28 | 29 | #写42个(.tab)文件 30 | varName <- paste("chr",1:42,".tab",sep="") 31 | for(i in 1:42){ 32 | write.table(orderd[which(orderd$chromosome == i),], varName[i],row.names=F,quote=F,sep="\t") 33 | } 34 | #写42个(.bed)文件 35 | varName <- paste("chr",1:42,".bed",sep="") 36 | for(i in 1:42){ 37 | var <- cbind(orderd[which(orderd$chromosome == i),3],orderd[which(orderd$chromosome == i),4],orderd[which(orderd$chromosome == i),4]+1) 38 | write.table(var, varName[i],row.names=F,col.names=F,quote=F,sep="\t") 39 | } 40 | -------------------------------------------------------------------------------- /03_Analysis/10_IceData.r: -------------------------------------------------------------------------------- 1 | #冰川数据 2 | setwd("/Users/guoyafei/Downloads/stack/") 3 | data <- read.table("LR04stack.txt",header=T,stringsAsFactors = F,sep="\t") 4 | ggplot(data, aes(x=data$Time..ka., y=data$Benthic.d18O..per.mil.)) + 5 | geom_line() + 6 | scale_x_log10()+scale_y_reverse() + 7 | geom_hline(aes(yintercept=4), colour="#990000", linetype="dashed") -------------------------------------------------------------------------------- /03_Analysis/11_ReadDepth.sh: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------------计算reads depth.sh-------------------------------------------------------------------------------------- 2 | #工作目录:204:/data2/xuebo/Projects/Speciation/More_accessions/hapScan/ 3 | #位点depth 4 | samtools depth -b bed_file sample.bam > sample.depth #bed 用来指定统计区间,运行后输出指定区间每一个碱基的测序深度。 5 | #区间depth 6 | bedtools makewindows -g genome.txt -w 10000000 -s 2000000 > windows.bed 7 | #bedtools makewindows用来自动生成划窗区间。-g genome.txt是要划分的基因组,格式为两列:染色体、染色体长度;-w 10000000为窗口大小为10M;-s 1000000为步长为1M,即窗口在染色体上每次向右平移1M的距离;windows.bed为输出文件,格式为三列:染色体、区间开始位点、区间结束位点。 8 | bedtools coverage -a windows.bed -b xxx.sort.bam > xxx.depth.txt 9 | #bedtools coverage对划分好的每个滑动窗口进行reads数(depth)的统计。-a windows为上一步划分好的区间;-b xxx.sort.bam为测序数据mapping到参考基因组的比对文件;xxx.depth.txt为统计结果的输出文件,格式为7列:染色体、区间起始位点、区间结束位点、该区间内的reads数、该区间内的碱基数、区间大小、该区间的平均覆盖度。 10 | samtools bedcov bed_file samplename.bam > sample.bedcov 11 | #输出的文件中计算了bed文件每一个区间的碱基总数,这里并不是reads的条数 12 | -------------------------------------------------------------------------------- /03_Analysis/12_BWA.sh: -------------------------------------------------------------------------------- 1 | ####################################################################### bwa ########################################################################## 2 | 3 | zcat file.fq.gz | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > file_sorted.fastq 4 | 5 | for ID in {"CRR061683","CRR061684"} 6 | do 7 | ref=/data1/home/xinyue/ref/abd_iwgscV1.fa.gz 8 | in1=/data1/home/xinyue/data/cleandata/${ID}_f1_paired.fq.gz 9 | in2=/data1/home/xinyue/data/cleandata/${ID}_r2_paired.fq.gz 10 | in="$in1 $in2" 11 | out=/data1/home/xinyue/data/bamdata 12 | echo -e "#!/bin/bash \n 13 | bwa mem -t 32 -R '@RG\tID:$ID\tSM:$ID\tLB:SL\tPL:IL' $ref $in | samtools view -S -b - > $out/$ID.bam " > NGS_bwa.$ID.sh 14 | sh NGS_bwa.$ID.sh 15 | done 16 | ## sort 17 | samtools sort -@ 2 -O bam -o $out/$ID.sorted.bam $out/$ID.bam 18 | ## MarkDuplicates and index 19 | java -jar /data1/home/xuebo/software/picard.jar MarkDuplicates INPUT=$out/$ID.sorted.bam OUTPUT=$out/$ID.rm.bam METRICS_FILE=$out/$ID.metrics.txt 20 | ## index 21 | samtools index $out/$ID.rm.bam 22 | 23 | -------------------------------------------------------------------------------- /04_GWAS/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/04_GWAS/.DS_Store -------------------------------------------------------------------------------- /04_GWAS/03_Gene_Domain.r: -------------------------------------------------------------------------------- 1 | ###画这个基因的各个部分的分类图 2 | a = 449254000 3 | b = 449260000 4 | plot(NULL, xlim=c(a, b), ylim=c(0, 5), main="",axes=FALSE, xlab="", ylab="", xaxs="i", yaxs="i") 5 | polygon(c(a, a, b,b), c(2.7,3,3,2.7),col="grey", border="grey") 6 | rect(449254955,2.7,449255062,3,col="#00868B",border = NA) 7 | rect(449255063,2.7,449255198,3,col="#00FFFF",border = NA) 8 | rect(449255307,2.7,449255709,3,col="#00FFFF",border = NA) 9 | rect(449256551,2.7,449257487,3,col="#00FFFF",border = NA) 10 | rect(449259163,2.7,449259241,3,col="#00868B",border = NA) 11 | 12 | -------------------------------------------------------------------------------- /04_GWAS/04_Gene_trait.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/个人项目/傅老师/20210311/GWAS_sign_gene/") 2 | data <- read.table("VariantComponent.txt",header=T,stringsAsFactors = F) 3 | data$X1.428374375 <- as.factor(data$X1.428374375) 4 | data$X3.106681694 <- as.factor(data$X3.106681694) 5 | data$X3.107363563 <- as.factor(data$X3.107363563) 6 | data$X16.83205051 <- as.factor(data$X16.83205051) 7 | pdf("SNP_geno_trait.pdf") 8 | ggplot(data, aes(x = X1.428374375, y = Weight))+ 9 | geom_boxplot(position=position_dodge(0.5),width=0.6) + geom_point() +theme_classic() + labs(x="NGR5_1(1-428374375)") + theme(axis.title.x = element_text(size=15), axis.title.y = element_text(size=15),axis.text.x = element_text(size=15), axis.text.y = element_text(size=15)) 10 | ggplot(data, aes(x = X3.106681694, y = Weight))+ 11 | geom_boxplot(position=position_dodge(0.5),width=0.6) + geom_point() +theme_classic() + labs(x="PIF_2(3-106681694)") + theme(axis.title.x = element_text(size=15), axis.title.y = element_text(size=15),axis.text.x = element_text(size=15), axis.text.y = element_text(size=15)) 12 | #ggplot(data, aes(x = X3.107363563, y = Weight))+ 13 | # geom_boxplot(position=position_dodge(0.5),width=0.6) + geom_point() +theme_classic() + labs(x="PIF_2(3-107363563)") + theme(axis.title.x = element_text(size=15), axis.title.y = element_text(size=15),axis.text.x = element_text(size=15), axis.text.y = element_text(size=15)) 14 | ggplot(data, aes(x = X16.83205051, y = Weight))+ 15 | geom_boxplot(position=position_dodge(0.5),width=0.6) + geom_point() +theme_classic() + labs(x="GA2ox3-B1(16-83205051)") + theme(axis.title.x = element_text(size=15), axis.title.y = element_text(size=15),axis.text.x = element_text(size=15), axis.text.y = element_text(size=15)) 16 | dev.off() 17 | -------------------------------------------------------------------------------- /04_GWAS/08_Correlation.r: -------------------------------------------------------------------------------- 1 | library(gggplot2) 2 | library("ggpubr") 3 | 4 | data <- read.table("/Users/guoyafei/Desktop/相关性.txt", header=T, stringsAsFactors = F) 5 | sub <- data[!is.na(data$株高.郑老师),] 6 | sub2 <- sub[!is.na(sub$气孔导度),] 7 | ggscatter(data, x = "株高.郑老师", y = "气孔导度", 8 | add = "reg.line", conf.int = TRUE, 9 | cor.coef = TRUE, cor.method = "pearson", 10 | xlab = "Plant height", ylab = "Stomatal conductance") 11 | ggscatter(data, x = "ZX株高", y = "气孔导度", color = "orgCty", 12 | add = "reg.line", conf.int = TRUE, 13 | cor.coef = TRUE, cor.method = "pearson", 14 | xlab = "Tiller", ylab = "Stomatal conductance") 15 | ggplot(data=data, aes(x=ZX株高, y=气孔导度 ))+ 16 | geom_point(size=3,aes(x=ZX株高, y=气孔导度,color=orgCty ))+ 17 | stat_smooth(method="lm")+ 18 | xlab("plant height")+ 19 | ylab("Stomatal conductance")+ 20 | theme_classic() 21 | 22 | ggplot(data,aes(x = ZX株高,color=orgCty)) + 23 | geom_histogram(aes(x = ZX株高),stat="bin",binwidth=1, boundary = 0)+ 24 | theme_classic()+ 25 | xlab("plant height") 26 | 27 | geom_text(aes(label=as.character(round(..density..,2))),stat="bin",binwidth=0.01,boundary = 0,vjust=-0.5) 28 | 29 | 30 | -------------------------------------------------------------------------------- /05_Plot/Plot_Density.r: -------------------------------------------------------------------------------- 1 | install.packages('RIdeogram') 2 | 3 | require(RIdeogram) 4 | setwd("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/06_Test/") 5 | gene_density <- read.table("SNP_density.txt", header=T, stringsAsFactors = F) 6 | wheat_karyotype <- read.table("Centrome.txt", header=T, stringsAsFactors = F) 7 | ideogram(karyotype = wheat_karyotype, overlaid = gene_density) 8 | convertSVG("chromosome.svg", device = "png") 9 | -------------------------------------------------------------------------------- /05_Plot/Plot_Fst.r: -------------------------------------------------------------------------------- 1 | #画每组比较的boxplot图 2 | library(stringr) 3 | path <- "/data2/yafei/Project3/FST_group/VmapData/A/boxplot" 4 | name <- c("5_10", "5_11", "5_12", "5_13", "5_14", "5_15", "5_16", "5_17", "5_18", "5_19", "5_20", "5_21", "5_22", "5_23", "5_24", "5_6", "5_7", "5_8", "5_9", "6_10", "6_11", "6_12", "6_13", "6_14", "6_15", "6_16", "6_17", "6_18", "6_19", "6_20", "6_21", "6_22", "6_23", "6_24", "6_7", "6_8", "6_9") 5 | fileNames <- dir(path) 6 | filePath <- sapply(fileNames, function(x){ 7 | paste(path,x,sep='/')}) 8 | data <- lapply(filePath, function(x){ 9 | read.table(x, header=T,stringsAsFactors=F)}) 10 | length(data) 11 | for(i in 1:length(data)){ 12 | data[[i]]$Name <- name[i] 13 | data[[i]]$Before <- str_split_fixed(data[[i]]$Name, "_", 2)[1] 14 | data[[i]]$After <- sapply(strsplit(as.character(data[[i]]$Name),'_'), "[", 2) 15 | } 16 | 17 | all <- data.frame() 18 | for(i in 1:length(data)){ 19 | all <- rbind(all, data[[i]]) 20 | } 21 | 22 | library(ggplot2) 23 | pdf(file = "FST_box_weight.pdf",width=200,height=100) #结果保存 24 | ggplot(all, aes(After, WEIGHTED_FST, fill=factor(Before))) + geom_boxplot() +facet_grid(.~After, scales = "free_x")+ 25 | scale_x_discrete() 26 | dev.off() 27 | pdf(file = "FST_box_mean.pdf",width=200,height=100) #结果保存 28 | ggplot(all, aes(After, MEAN_FST, fill=factor(Before))) + geom_boxplot() +facet_grid(.~After, scales = "free_x")+ 29 | scale_x_discrete() 30 | dev.off() 31 | 32 | #计算位点FST 33 | #Alineage 34 | #pop5-24: 35 | 36 | thread_num=15 37 | tempfifo="my_temp_fifo" 38 | mkfifo ${tempfifo} 39 | exec 6<>${tempfifo} 40 | rm -f ${tempfifo} 41 | for ((i=1;i<=${thread_num};i++)) 42 | do 43 | { 44 | echo 45 | } 46 | done >&6 47 | 48 | for i in {14,15,16,17,18,19,20,21,22,23} 49 | do 50 | for((j=i+1;j<=24;j++)) 51 | do 52 | { 53 | read -u6 54 | { 55 | vcftools --gzvcf /data1/home/yafei/Project3/Vmap1.1/Alineage.vcf.gz --weir-fst-pop /data2/yafei/Project3/FST_group/subgroups/"pop"$i".txt" --weir-fst-pop /data2/yafei/Project3/FST_group/subgroups/"pop"$j".txt" --out /data2/yafei/Project3/FST_group/VmapData/A/site_fst/"p_"$i"_"$j >> Anohup$i 2>& 1 56 | echo "" >&6 57 | } & 58 | } 59 | done 60 | done 61 | wait 62 | 63 | exec 6>&- 64 | 65 | #Blineage 66 | #pop5-24: 67 | 68 | for i in {14,15,16,17,18,19,20,21,22,23} 69 | do 70 | for((j=i+1;j<=24;j++)) 71 | do 72 | vcftools --gzvcf /data1/home/yafei/Project3/Vmap1.1/Alineage.vcf.gz --weir-fst-pop /data2/yafei/Project3/FST_group/subgroups/"pop"$i".txt" --weir-fst-pop /data2/yafei/Project3/FST_group/subgroups/"pop"$j".txt" --out /data2/yafei/Project3/FST_group/VmapData/A/site_fst/"p_"$i"_"$j >> Anohup$i 2>& 1 & 73 | done 74 | done 75 | 76 | #提取D site FST top 5%的位点 77 | for i in `ls *.weir.fst` 78 | do 79 | sed '/-nan/d' $i | awk '{if ($3 < 0 ) {$3=0;print $0} else{print $0}}' | sort -k3 -gr > $i.txt & 80 | done 81 | #计算每个文件的行数,并计算%5是多少行,重定向到*pos文件中 82 | for i in `ls *txt` 83 | do 84 | wc -l $i 85 | done 86 | 87 | #提取fst大于0.2的位点 88 | for i in `ls *txt` 89 | do 90 | awk '{if($3 > 0.2) {print $0} else {exit}}' $i > $i.pos2 & 91 | done 92 | 93 | #粘到Excel里 94 | 95 | cat *pos2 | awk '{print $1"\t"$2}' | sort | uniq |awk '{print $1"\t"$2-1"\t"$2}' |sort -k1,1n -k2,2n > D_highfst.bed 96 | 97 | awk '{if($1 == 10) print $2"\t"$3-1"\t"$3}' count.0.2thresh |sort -k1,1n -k2,2n > my.bed 98 | 99 | #提取 100 | yafei@203:/data2/yafei/Project3/FST_group/VmapData/D/site_fst/D_highfst.bed 101 | 102 | 103 | for chr in {1,2,7,8,13,14,19,20,25,26,31,32,37,38} 104 | for chr in {5,6,11,12,17,18,23,24,29,30,35,36,41,42} 105 | do 106 | bedtools intersect -a /data1/home/yafei/Project3/Vmap1.1/chr${chr}.all.vcf.gz -b /data2/yafei/Project3/FST_group/VmapData/D/site_fst/my/myD.bed -header > chr${chr}.D.vcf & 107 | done -------------------------------------------------------------------------------- /05_Plot/Plot_IBS.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/Lulab/Project-2-Migration/基本统计/IBS/") 2 | data <- read.table("126Landrace_barley.txt",header=T,stringsAsFactors = F) 3 | library(ggplot2) 4 | library(ggmap) 5 | library(sp) 6 | library(maptools) 7 | library(maps) 8 | library(psych) 9 | visit.x<-data$Logititude 10 | visit.y<-data$Latitude 11 | mp <- NULL 12 | mapworld <- borders("world",colour = "gray50",fill="white") 13 | mp <- ggplot()+mapworld+ylim(-60,90) 14 | 15 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$wild_A),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+ 16 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 17 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$dome_A),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+ 18 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 19 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$free_A),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+ 20 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 21 | 22 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$wild_B),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+ 23 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 24 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$dome_B),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+ 25 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 26 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$free_B),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+ 27 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 28 | 29 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$Strangulata),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+ 30 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 31 | 32 | mp3 <- mp2+ guides(fill=guide_legend(title=NULL)) 33 | mp3 34 | -------------------------------------------------------------------------------- /05_Plot/Plot_MapPie.r: -------------------------------------------------------------------------------- 1 | #统一设置:注释内容及颜色 2 | library(pheatmap) 3 | require(reshape) 4 | require (rworldmap) 5 | require(rworldxtra) 6 | library(pophelper) 7 | library(ggplot2) 8 | require(gridExtra) 9 | setwd("/Users/guoyafei/Documents/个人项目/Project-2-Migration/migration/add_ZNdata/Structure_map/EA/") 10 | sfiles <- list.files(path="/Users/guoyafei/Documents/个人项目/Project-2-Migration/migration/add_ZNdata/Structure_map/EA/QMatrix/", full.names=T) 11 | slist <- readQ(files=sfiles) 12 | tabulateQ(qlist=readQ(sfiles)) 13 | summariseQ(tabulateQ(qlist=readQ(sfiles))) 14 | 15 | p1 <- plotQ(slist[1:5],returnplot=T,exportplot=F,quiet=T,basesize=11, 16 | showindlab=F,showyaxis=T,showticks=T,clustercol=brewer.pal(6, "Set2")) 17 | 18 | p1 <- plotQ(slist[1:5],returnplot=T,exportplot=F,quiet=T,basesize=11, 19 | sortind="all",showindlab=T,showyaxis=T,showticks=T,sharedindlab=T,clustercol=brewer.pal(6, "Set2")) 20 | 21 | p1 <- plotQ(slist[1:5],returnplot=T,exportplot=F,quiet=T,basesize=11, 22 | sortind="Cluster1",showindlab=T,showyaxis=T,showticks=T,sharedindlab=T) 23 | 24 | grid.arrange(p1$plot[[1]],p1$plot[[2]],p1$plot[[3]],p1$plot[[4]],p1$plot[[5]],nrow=5) 25 | 26 | grid.arrange(m,p1$plot[[1]],nrow=2) 27 | 28 | 29 | 30 | #聚类 31 | #根据经纬度给样本聚类 32 | data <- read.table("EA_taxa.txt", header=T, sep="\t", stringsAsFactors = F) 33 | library(cluster) 34 | library(factoextra) 35 | dataA <- data[,c(3,4)] 36 | data2 <- dataA[!is.na(dataA$Latitude),] 37 | df = scale(data2) 38 | 39 | #先求样本之间两两相似性 40 | result <- dist(df, method = "euclidean") 41 | #产生层次结构 42 | result_hc <- hclust(d = result, method = "ward.D2") 43 | 44 | data2$type <- cutree(result_hc, k=50) 45 | lat_mean <- tapply(data2[,1],data2$type,mean,na.rm = TRUE) 46 | lon_mean <- tapply(data2[,2],data2$type,mean,na.rm = TRUE) 47 | data2$cluster1 <- NA 48 | data2$cluster2 <- NA 49 | for(i in 1:138) { 50 | for(j in 1:50){ 51 | if(data2[i,3] == j ){ 52 | data2[i,4] <- as.numeric(lat_mean[j]) 53 | data2[i,5] <- as.numeric(lon_mean[j]) 54 | } 55 | } 56 | } 57 | write.table(data2,"BB_data2.txt",sep="\t",row.names = F,quote=F) 58 | 59 | #map 60 | require("RColorBrewer") 61 | setwd("/Users/guoyafei/Documents/个人项目/Project-2-Migration/migration/add_ZNdata/Structure_map/EA/") 62 | data <- read.table("EA_taxa.txt",header=T,sep="\t",stringsAsFactors = F) 63 | dataA <- data[,c(1,5,6,16,17)] 64 | colnames(dataA)[1:5] <- c("type","Latitude", "Longitude", "Sub.population.6", "value") 65 | wheat2 = dataA[!is.na(dataA$Latitude),] 66 | wheat = wheat2[!is.na(wheat2$Sub.population.6),] 67 | wheat_reshape <- wheat_reshape <- cast(wheat,Latitude+Longitude~Sub.population.6) 68 | wheat_reshape2 <- as.data.frame(wheat_reshape) 69 | 70 | #8 71 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2',"3","4","5","6","7","8"),symbolSize=2.5, 72 | zColours=brewer.pal(8, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome") 73 | #6 74 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2',"3","4","5","6"),symbolSize=2.5, 75 | zColours=brewer.pal(6, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome") 76 | #5 77 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2',"3","4","5"),symbolSize=2.5, 78 | zColours=brewer.pal(5, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome") 79 | #4 80 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,36),nameX="Longitude",nameY="Latitude",nameZs=c('1','2',"3","4"),symbolSize=2.5, 81 | zColours=brewer.pal(4, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome") 82 | #3 83 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2','3'),symbolSize=2.5, 84 | zColours=brewer.pal(3, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome") 85 | #2 86 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2'), 87 | zColours=brewer.pal(2, "Set2"),symbolSize=2.5,barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome") 88 | par(mfcol=c(5,1)) -------------------------------------------------------------------------------- /05_Plot/Plot_Pi_02.r: -------------------------------------------------------------------------------- 1 | # PI.R 2 | # Libraries 3 | library(ggplot2) 4 | library(dplyr) 5 | library(tidyr) 6 | library(forcats) 7 | #library(hrbrthemes) 8 | library(viridis) 9 | 10 | # Load dataset from github 11 | data <- read.table("https://raw.githubusercontent.com/zonination/perceptions/master/probly.csv", header=TRUE, sep=",") 12 | 13 | # Data is at wide format, we need to make it 'tidy' or 'long' 14 | data <- data %>% 15 | gather(key="text", value="value") %>% 16 | mutate(text = gsub("\\.", " ",text)) %>% 17 | mutate(value = round(as.numeric(value),0)) %>% 18 | filter(text %in% c("Almost Certainly","Very Good Chance","We Believe","Likely","About Even", "Little Chance", "Chances Are Slight", "Almost No Chance")) 19 | 20 | # Plot 21 | p <- data %>% 22 | mutate(text = fct_reorder(text, value)) %>% # Reorder data 23 | ggplot( aes(x=text, y=value, fill=text, color=text)) + 24 | geom_violin(width=2.1, size=0.2) + 25 | scale_fill_viridis(discrete=TRUE) + 26 | scale_color_viridis(discrete=TRUE) + 27 | #theme_ipsum() + 28 | theme( 29 | legend.position="none" 30 | ) + 31 | coord_flip() + # This switch X and Y axis and allows to get the horizontal version 32 | xlab("") + 33 | ylab("Assigned Probability (%)") 34 | 35 | p 36 | p -------------------------------------------------------------------------------- /06_MarkDown/ASMC.md: -------------------------------------------------------------------------------- 1 | # ASMC 2 | > P. Palamara, J. Terhorst, Y. Song, A. Price. High-throughput inference of pairwise coalescence times identifies signals of selection and enriched disease heritability. Nature Genetics, 2018. 3 | ### Summary (TL;DR) 4 | ASMC是一种有效估计基因组`pairwise coalescence times`的方法。它可以使用SNP芯片或全基因组测序(WGS)数据运行。 5 | ### 输入文件格式 6 | * HAP文件 7 | 是一个没有标题行的文本文件,有 2N+5 或 2N 列,其中 N 是样本数。 在前一种情况下,前五列是: 8 | 1. 染色体代码 9 | 2. 变体 ID 10 | 3. 碱基对坐标 等位基因 0(通常是次要的,导入时使用 'ref-first' 视为 REF) 11 | 4. 等位基因 1(通常是主要的,导入时使用 'ref-last' 视为 REF) 12 | 接下来是第一个样本的一对 0/1 值的单倍型列,然后是第二个样本的一对单倍型列 13 | * Sample文件 14 | gen 或 .bgen 基因型剂量文件或 .haps 分阶段参考面板随附的样本信息文件。 加载了--data/--sample,并在某些情况下由--export 生成。 15 | 16 | 默认情况下,由 --export 发出的 .sample 空格分隔文件有两个标题行,然后每个样本一行有 4 个以上的字段: 17 | 18 | 19 | * map文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /06_MarkDown/GATK.md: -------------------------------------------------------------------------------- 1 | # GATK -------------------------------------------------------------------------------- /06_MarkDown/MCMC.md: -------------------------------------------------------------------------------- 1 | ## 马尔可夫链蒙特卡罗算法(MCMC) 2 | 3 | ### 一句话概括就是:通过在概率空间中随机采样以近似兴趣参数(parameter of interest)的后验分布。 4 | 5 | * 首先,「兴趣参数」(parameter of interest)可以总结我们感兴趣现象的一些数字。我们通常使用统计学评估参数,比如,如果想要了解成年 6 | 人的身高,我们的兴趣参数可以是精确到英寸的平均身高。「分布」是参数的每个可能值、以及我们有多大可能观察每个参数的数学表征,其最著名的实例是钟形曲线: 7 | * ![98873image -21-](assets/98873image%20-21-.png) 8 | -------------------------------------------------------------------------------- /06_MarkDown/bayenv_method.md: -------------------------------------------------------------------------------- 1 | ### 基因组上一个与环境相关的位点,有什么样的特征? 2 | * 这些位点可能表现出等位基因频率与重要生态变量之间有异常的相关性或者不同地理环境间的等位基因频率的差异很大,这些特征可以使我们识别出参与局部适应的潜在位点。 3 | ### 如果想在全基因组上鉴定一个环境相关的位点,会遇到什么样的困难? 4 | * 样本大小的差异,群体之间共享的分化历史和基因流可能会导致群体间的等位基因频率有一定的相关性(neutral correlation),这会混淆我们对适应性位点的鉴定。 5 | ### 如何克服? 6 | * 作者开发了一种贝叶斯方法,从一组标记中估计群体间等位基因频率协方差的经验模式,然后将其作为一个零模型用于单个SNPs的测试。 7 | ### 方法具体是怎样的? 8 | * 在作者的模型中,群体中一个等位基因的频率来自一组潜在的群体频率,假设这些总体频率的变换遵循多元正态分布。 9 | * 作者首先用蒙特卡罗马尔可夫链估计多元正态分布的协方差矩阵。在每个SNP中,作者提供了一种支持模型的度量方式-贝叶斯因子,其中环境变量对转换后的等位基因频率有线性影响,而不是单独由协方差矩阵给出的模型。该测试的性能优于现有的相关测试。 10 | * 作者还证明,该方法可以用于识别具有异常大的等位基因频率分化的snp,并提供了一个强有力的基于两两或全局FST的替代方法。 11 | * 软件 http://www.eve.ucdavis.edu/gmcoop/ 12 | ![](assets/16442473466436.jpg) 13 | ### 问题描述 14 | * 如图1所示,在广泛区域内的群体在等位基因频率和离赤道的距离(纬度)两个变量上都是聚集的。因此,等位基因频率和环境变量之间的相关性仅得到了比图1中52个点少得多的独立观察数据的支持。此外,还不清楚图1中等位基因频率的变化是否是由于样本量少造成的采样误差或者遗传漂变产生的。 15 | * 因此,在作者的模型中,在测试环境变量和等位基因频率之间的相关性时,考虑了样本大小的差异和等位基因频率的零相关假设。 16 | * 为了做到这一点,我们使用一组控制位点(control loci)来构建等位基因频率如何在人群中共变的零模型(null model)。然后,我们可以测试在一个感兴趣的位点看到的等位基因频率与环境变量之间的相关性是否大于这个零模型的预期。我们集中讨论了snp等共显性和双等位的标记,该模型也可以扩展到其他类型的标记。本文提出的方法可应用于连续或离散环境变量。 17 | ### 方法 18 | 零模型(Null model): 19 | 假设在K个群体中有L个独立的SNPs。如果L非常大,为了提高计算速度,我们可以使用L的一个大的随机子集来估计空模型。与环境相关的标记可能在L中,但是我们预计这一小部分的位点对零模型的参数估计产生很小的影响。另外,L也代表了一组良好的控制标记。 20 | 21 | To overcome the constrained nature of allele frequencies 22 | 为了克服等位基因频率的局限性: 23 | 我们遵循尼科尔森的假设:亚群的等位基因频率通常是正态分布的,围绕着一个祖先的等位基因频率el (02) print $2,$2,$3,$4;else print $0}' Alineage001_Free_threshing_phased_imputed.sample 4 | 5 | -------------------------------------------------------------------------------- /07_VMap3/05_TasselGWAS.sh: -------------------------------------------------------------------------------- 1 | #数据筛选及格式转换 2 | 3 | #需要安装的软件:plink等 4 | 1.按MAF>0.05和缺失率<0.1过滤 5 | plink --vcf test.imputed.vcf --maf 0.05 --geno 0.1 --recode vcf-iid --out test.filter --allow-extra-chr --autosome-num 42 6 | #(非数字染色体号ChrUn/Sy用此参数, 建议尽量把染色体号转成数字,另外需要对vcf中的标记ID进行编号) 7 | 2.对标记进行LD筛选 8 | plink --vcf test.filter.vcf --indep-pairwise 50 10 0.2 --out test.filterLD --allow-extra-chr --autosome-num 42(.in文件里是入选的标记id) 9 | 3.提取筛选结果 10 | plink --vcf test.filterLD.vcf --make-bed --extract test.filter.in --out test.filter.prune.in 11 | 4.转换成structure/admixture格式 12 | plink --bfile test.filter.prune.in --recode structure --out test.filter.prune.in #生成. recode.strct_in为structure输入格式 13 | plink --bfile test.filter.prune.in --recode 12 --out test.filter.prune.in #生成.ped为admixture输入格式 14 | 15 | #群体结构 16 | #需要安装的软件:structure/admixture等 17 | #这里我选了比较简单admixture来做 k值范围1到13 18 | admixture --cv test.filter.ped 1 >>log.txt 19 | admixture --cv test.filter.ped 2 >>log.txt 20 | ....... 21 | admixture --cv test.filter.ped 13 >>log.txt 22 | wait 23 | grep "CV error" log.txt >k_1to13 24 | 25 | #取CV error最小时的k值=10, 其中test.filter.prune.in.10.Q结果文件作为关联分析的输入源文件(去掉最后一列 添加表头和ID) 26 | 27 | #亲缘关系/PCA 28 | #需要安装的软件: tassel等(PCA分析可以用R包,已经做了群体结构这里就没做PCA分析) 29 | run_pipeline.pl -importGuess test_impute.vcf -KinshipPlugin -method Centered_IBS -endPlugin -export test_kinship.txt -exportType SqrMatrix 30 | 31 | #关联分析 32 | #需要安装的软件: tassel/GAPIT/FaSt-LMM等( GAPIT很强大,要装很多R包,自动做图可视化。FaSt-LMM以后打算尝试一下) 33 | #输入文件的格式需要手动修改一下,也比较简单 (如图) 34 | 1.vcf转hapmap格式 35 | run_pipeline.pl -fork1 -vcf test.imputed.vcf -export test -exportType Hapmap -runfork1 36 | 2.SNP位点排序 37 | run_pipeline.pl -SortGenotypeFilePlugin -inputFile test.hmp.txt -outputFile test_sort -fileType Hapmap 38 | #得到test.hmp.txt 39 | 3. GLM模型 40 | run_pipeline.pl -fork1 -h test_sort.hmp.txt -fork2 -r test.trait.txt -fork3 -q test.best_k10.txt -excludeLastTrait -combine4 -input1 -input2 -input3 -intersect -glm -export test_glm -runfork1 -runfork2 -runfork3 41 | 4.MLM模型 42 | run_pipeline.pl -fork1 -h test_sort.hmp.txt -fork2 -r test.trait.txt -fork3 -q test.best_k10.txt -excludeLastTrait -fork4 -k test_kinship.txt -combine5 -input1 -input2 -input3 -intersect -combine6 -input5 -input4 -mlm -mlmVarCompEst P3D -mlmCompressionLevel None -export test_mlm -runfork1 -runfork2 -runfork3 43 | -------------------------------------------------------------------------------- /07_VMap3/06_Selscan.r: -------------------------------------------------------------------------------- 1 | #selscan 2 | #参考资料 3 | #hapFLK对具有对瓶颈效应或迁移的群体有着更强大的检测选择信号的能力。 4 | #H12对于部分扫荡选择性清除(soft selective sweep)和全扫荡选择性清除(hard selective sweep)都有比较好的检测效率。 5 | #LASSI-Plus: A program written in C++ that implements the likelihood ratio Λ statistic of DeGiorgio and Szpiech (2021) for detecting selective sweeps and inferring their softness using the spatial distribution of distortions in the haplotype frequency spectrum. 6 | #SweepFinder2: A program written in C that can perform genomic scans for recent selective sweeps selection while controlling for background selection and mutation rate variation (DeGiorgio et al. 2016). 7 | #ihs 8 | selscan --ihs --threads 30 --gap-scale 200000 --max-gap 2000000 --maf 0.01 --vcf chr001.phased.imputed.pos.vcf.gz --map chr001.map4 --out chr001 --keep-low-freq 9 | #selscan --ihs --threads 30 --gap-scale 200000 --max-gap 2000000 --maf 0.01 --vcf chr001.phased.imputed.pos.maf005.vcf.gz --map chr001.maf005.map4 --out chr001.maf005 --keep-low-freq 10 | 11 | data <- read.table("/Users/guoyafei/公共资源/01_数据资源/iwgsc_refseqv1.0_mapping_data_42chr.txt",header=T,stringsAsFactors = F) 12 | don <- data %>% 13 | # Compute chromosome size 14 | group_by(chromosome) %>% 15 | lines(lowess(data$physicalPosition,data$geneticPosition)) %>% 16 | # Calculate cumulative position of each chromosome 17 | mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>% 18 | select(-chr_len) %>% 19 | # Add this info to the initial dataset 20 | left_join(gwasResults, ., by=c("CHR"="CHR")) %>% 21 | # Add a cumulative position of each SNP 22 | arrange(CHR, BP) %>% 23 | mutate( BPcum=BP+tot) 24 | 25 | -------------------------------------------------------------------------------- /07_VMap3/07_scam.r: -------------------------------------------------------------------------------- 1 | require(scam) 2 | testx=data1$physicalPosition 3 | length(testx) 4 | testy=data1$geneticPosition 5 | length(testy) 6 | fit = scam(testy~s(testx,k=30,bs="cr"), family=gaussian(link="identity")) 7 | predict(fit,data.frame(testx)) 8 | #plot(testx,testy) 9 | #lines(testx,predict(fit),col="red") 10 | 11 | plot(testx,predict(fit,data.frame(testx))) 12 | #head(data.frame(testx)) 13 | #z<-diff(predict(fit,data.frame(testx))) 14 | #sort(z) 15 | #zz<-data.frame(testx,testy,predict(fit,data.frame(testx))) 16 | #summary(zz) 17 | #zz%>% 18 | # gather(key="group",value="y",c(2,3)) %>% 19 | # ggplot(aes(x=testx,y=y,color=group))+geom_point(size=0.1) 20 | #zz%>%ggplot(aes(x=predict.fit..data.frame.testx..,y=testy))+geom_point() 21 | #test<-c(1157133,2410604) 22 | #approx(df1$physicalPosition,df1$geneticPosition,xout = test,method = "linear",rule = 1,ties = "ordered")$y 23 | #head(df1$physicalPosition) 24 | #zlist<- unique(df$chromosome) 25 | #for(p in zlist){ 26 | # print(p) 27 | #} 28 | #head(df) 29 | require(scam) 30 | setwd("/Users/guoyafei/Documents/02_VmapIII/06_Selection/selscan") 31 | map <- read.table("/Users/guoyafei/公共资源/01_数据资源/iwgsc_refseqv1.0_mapping_data_42chr.txt",header=T,stringsAsFactors = F) 32 | filenames <- paste(c(paste("chr00",1:9,sep=""),paste("chr0",10:42,sep="")),".pos.txt",sep="") 33 | for(i in c(6)){ 34 | sub <- map[which(map$chromosome == i),] 35 | pos <- read.table(filenames[i],header=F,stringsAsFactors = F) 36 | colnames(pos) <- c("chr","physicalPosition") 37 | physicalPosition=sub$physicalPosition 38 | geneticPosition=sub$geneticPosition 39 | fit = scam(geneticPosition~s(physicalPosition,k=20,bs="mpi"), family=gaussian(link="identity")) 40 | #plot(physicalPosition,predict(fit,data.frame(geneticPosition))) 41 | ID <- paste(pos$chr,pos$physicalPosition,sep="-") 42 | physicalPosition <- pos$physicalPosition 43 | out <- data.frame(pos$chr,ID,predict(fit,data.frame(physicalPosition)),pos$physicalPosition) 44 | outfile <- paste("/Users/guoyafei/Documents/02_VmapIII/06_Selection/selscan/map/",filenames[i],sep="") 45 | write.table(out,outfile,col.names = F,quote = F,sep="\t", row.names = F) 46 | } 47 | 48 | #chr036.ihs.out 49 | data <- read.table("/Users/guoyafei/Desktop/chr036.ihs.out",header=F,stringsAsFactors = F) 50 | colnames(data)<-c("locus","pos","1_freq","ihh_1","ihh_0","ihs") 51 | data2 <- data[sort(sample(c(1:52691),10000)),] 52 | p<- ggplot(data2, aes(x=pos/1000000, y=ihs)) + 53 | # Show all points 54 | geom_point(alpha=0.8, size=1.3) + 55 | #scale_color_manual(values = rep(c("grey","grey", "skyblue", "skyblue"), 7)) + 56 | # custom X axis: 57 | #scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) + 58 | #scale_y_continuous(expand = c(0, 0) ) + # remove space between plot area and x axis 59 | # Add highlighted points 60 | #geom_point(data=subset(don, is_highlight=="yes"), color="orange", size=2) + 61 | # Add label using ggrepel to avoid overlapping 62 | #geom_label_repel( data=subset(don, is_annotate=="yes"), aes(label=SNP), size=2) + 63 | # Custom the theme: 64 | theme_bw() + 65 | theme( 66 | legend.position="none", 67 | panel.border = element_blank(), 68 | panel.grid.major.x = element_blank(), 69 | panel.grid.minor.x = element_blank(), 70 | axis.text.x=element_text(size=15), 71 | axis.text.y=element_text(size=15), 72 | axis.title.y=element_text(size = 15), 73 | axis.title.x=element_text(size = 15), 74 | ) 75 | #scale_y_continuous(limits = c(0,7))+ 76 | #geom_point(data=point,aes(x=BPcum,y=-log10(P)),color="red") 77 | #geom_vline(xintercept = 528377322, colour="red",linetype=2, size=1) 78 | #geom_hline(yintercept = -log10(thresh[a,1]), colour="red",linetype=2, size=1)+ 79 | #geom_hline(yintercept = -log10(thresh[a,2]), colour="blue",linetype=2, size=1) 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /07_VMap3/08_Raxml-ng.r: -------------------------------------------------------------------------------- 1 | #Raxml-ng 2 | #yafei@204:/data2/yafei/004_Vmap3/Tree 3 | #run_pipeline.pl -Xms512m -Xmx5g -vcf lineageD.vcf.gz -export lineageD -exportType Phylip_Inter 4 | #raxml-ng --all --msa lineageAB.phy --seed 12346 --model GTR+G --bs-trees 100 --threads 80 5 | 6 | setwd("/Users/guoyafei/Desktop/") 7 | data <- read.table("/Users/guoyafei/Desktop/207_data.txt",header=F,stringsAsFactors = F) 8 | data <- read.table("/Users/guoyafei/Desktop/source_data_107.txt",header=F,stringsAsFactors = F) 9 | library(ggplot2) 10 | 11 | ggplot(data, aes(x = V1)) + 12 | geom_histogram(binwidth = 5, fill = "lightblue", colour = "black")+ 13 | xlim(1899,2010)+ 14 | theme_classic()+ 15 | theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size = 15),axis.text.y = element_text(size = 12),axis.title.y = element_text(size = 15))+ 16 | ylab("Count")+ 17 | xlab("Source Date (USDA)") 18 | 19 | 20 | -------------------------------------------------------------------------------- /07_VMap3/09_WheatGo.r: -------------------------------------------------------------------------------- 1 | #wheatGo 2 | library(ggplot2) 3 | library(RColorBrewer) 4 | display.brewer.all() 5 | col <- brewer.pal(n = 8, name = "Blues")[c(4,7)] 6 | col <- brewer.pal(n = 8, name = "Oranges")[c(4,7)] 7 | col <- brewer.pal(n = 8, name = "Greens")[c(4,7)] 8 | col <- brewer.pal(n = 8, name = "Red")[c(4,7)] 9 | 10 | setwd("/Users/guoyafei/Documents/02_VmapIII/08_Network/shufgene") 11 | setwd("/Users/guoyafei/Documents/02_VmapIII/08_Network/PPI") 12 | data <- read.table("go.txt",header=T,stringsAsFactors = F,sep="\t") 13 | colnames(data)[c(3,4,6)] <- c("Description", "Count", "p.adjust") 14 | ggplot(data=data)+ 15 | geom_bar(aes(x= Description, y=Count, fill=(-log10(p.adjust))), stat='identity') + 16 | coord_flip() + 17 | scale_fill_gradient(expression(-log(p.adjust)),low="#FDAE6B", high = "#D94801") + 18 | ylab("Gene number") + 19 | xlab("GO term description") + 20 | theme( 21 | axis.text.x=element_text(color="black",size=rel(0.3)), 22 | axis.text.y=element_text(color="black", size=rel(0.3)), 23 | axis.title.x = element_text(color="black", size=rel(5)), 24 | axis.title.y = element_blank(), 25 | legend.text=element_text(color="black",size=rel(0.2)), 26 | legend.title = element_text(color="black",size=rel(0.7)) 27 | )+ 28 | ylim(0,130)+ 29 | theme_bw()+ 30 | theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=1,colour="black")) 31 | 32 | 33 | #画整体的Go富集图---- 34 | path <- "/Users/guoyafei/Documents/02_VmapIII/08_Network/shufgene/Go" 35 | fileNames <- dir(path) 36 | filePath <- sapply(fileNames, function(x){ 37 | paste(path,x,sep='/')}) 38 | data <- lapply(filePath, function(x){ 39 | read.table(x, header=F, sep = "\t", stringsAsFactors=F)}) 40 | plot_list = list() 41 | for(i in 1:length(data)){ 42 | colnames(data[[i]])[c(3,4,6)] <- c("Description", "Count", "p.adjust") 43 | p <- ggplot(data=data[[i]])+ 44 | geom_bar(aes(x= Description, y=Count, fill=(-log(p.adjust))), stat='identity') + 45 | coord_flip() + 46 | #facet_grid(.~Name,scales="free") + 47 | #facet_wrap(~Name,ncol = 1)+ 48 | #A 49 | scale_fill_gradient(expression(-log(p.adjust)),low="#FDAE6B", high = "#D94801") + 50 | #B 51 | #scale_fill_gradient(expression(p.adjust),low="#9ECAE1", high = "#2171B5") + 52 | #D 53 | #scale_fill_gradient(expression(p.adjust),low="#A1D99B", high = "#238B45") + 54 | ylab("Gene number") + 55 | xlab("GO term description") + 56 | #expand_limits(y=c(0,8))+ 57 | #ylim(0,100)+ 58 | theme( 59 | axis.text.x=element_text(color="black",size=rel(0.3)), 60 | axis.text.y=element_text(color="black", size=rel(0.3)), 61 | axis.title.x = element_text(color="black", size=rel(5)), 62 | axis.title.y = element_blank(), 63 | legend.text=element_text(color="black",size=rel(0.2)), 64 | legend.title = element_text(color="black",size=rel(0.7)) 65 | )+ 66 | theme_bw()+ 67 | theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=1,colour="black")) 68 | 69 | plot_list[[i]] = p 70 | } 71 | 72 | name <- strsplit(names(data), ".txt") 73 | for (i in 1:length(data)) { 74 | file_name = paste(name[[i]],".pdf",sep="") 75 | pdf(file_name,height = 12,width = 9) 76 | print(plot_list[[i]]) 77 | dev.off() 78 | } 79 | -------------------------------------------------------------------------------- /07_VMap3/10_SDS.r: -------------------------------------------------------------------------------- 1 | #SDS 2 | #66:/data1/home/yafei/004_Vmap3/SDS-master 3 | library(qqman) 4 | library(tidyverse) 5 | require(gridExtra) 6 | setwd("/Users/guoyafei/Documents/02_VmapIII/06_Selection/SDS") 7 | chr1 <- read.table("test.txt", header=T, stringsAsFactors = F) 8 | 9 | #manhattan 10 | gwasResults2 <- chr1 11 | thresh <- gwasResults2[order(gwasResults2$P),][nrow(gwasResults)*0.05,4] 12 | #gwasResults2$CHR = 1 13 | colnames(gwasResults2) <- c("CHR","BP","SNP","P") 14 | gwasResults <- gwasResults2 15 | don <- gwasResults %>% 16 | # Compute chromosome size 17 | group_by(CHR) %>% 18 | summarise(chr_len=max(BP)) %>% 19 | # Calculate cumulative position of each chromosome 20 | mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>% 21 | select(-chr_len) %>% 22 | # Add this info to the initial dataset 23 | left_join(gwasResults, ., by=c("CHR"="CHR")) %>% 24 | # Add a cumulative position of each SNP 25 | arrange(CHR, BP) %>% 26 | mutate( BPcum=BP+tot) 27 | axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) )/ 2) 28 | p <- ggplot(don, aes(x=BPcum, y=-log(P))) + 29 | geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) + 30 | scale_color_manual(values = rep(c("grey","grey", "skyblue", "skyblue"), 7)) + 31 | scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) + 32 | scale_y_continuous(expand = c(0, 0) ) + # remove space between plot area and x axis 33 | geom_hline(yintercept = -log(thresh), color = 'red', size = 0.5) + 34 | scale_y_continuous(limits = c(0, 100))+ 35 | theme_bw() + 36 | theme( 37 | legend.position="none", 38 | panel.border = element_blank(), 39 | panel.grid.major.x = element_blank(), 40 | panel.grid.minor.x = element_blank(), 41 | axis.text.x=element_text(size=15), 42 | axis.text.y=element_text(size=15), 43 | axis.title.y=element_text(size = 15), 44 | axis.title.x=element_text(size = 15), 45 | ) 46 | 47 | pdf("Landrace.pdf",height = 9,width = 9) 48 | grid.arrange(p[[1]],p[[2]],p[[3]],nrow=3) 49 | dev.off() 50 | pdf("Cultivar.pdf",height = 9,width = 9) 51 | grid.arrange(p[[4]],p[[5]],p[[6]],nrow=3) 52 | dev.off() 53 | 54 | 55 | #qq 56 | 57 | qq_dat <- data.frame(obs=-log10(sort(data$P,decreasing=FALSE)), 58 | exp=-log10( ppoints(length(data$P)))) 59 | ggplot(data=qq_dat,aes(exp,obs))+ 60 | geom_point(alpha=0.7,color="#7F7F7FFF")+ 61 | geom_abline(color="#D62728FF")+ 62 | xlab("Expected -log10(P-value)")+ 63 | ylab("Observed -log10(P-value)")+ 64 | scale_x_continuous(limits = c(0,7))+ 65 | scale_y_continuous(limits = c(0,7))+ 66 | #ggtitle(name)+ 67 | theme( 68 | plot.title = element_text(color="red", size=20, face="bold.italic"), 69 | axis.title = element_text(size=12,face="bold"), 70 | axis.text = element_text(face="bold",size=8,color = "black"), 71 | #axis.line = element_line(size=0.8,color="black"), 72 | axis.ticks= element_line(size=0.8,colour = "black"), 73 | panel.grid =element_blank(), 74 | panel.border = element_rect(fill=NA,size = 0.8), 75 | panel.background = element_blank()) 76 | pdf(name1,width = 6,height = 3) 77 | grid.arrange(p[[1]],p[[2]],nrow=1) 78 | dev.off() 79 | 80 | -------------------------------------------------------------------------------- /07_VMap3/15_Rareallele.R: -------------------------------------------------------------------------------- 1 | #Rare Allele 2 | setwd("/Users/guoyafei/Documents/02_VmapIII/10_RareAllele") 3 | data <- read.table("sum2.txt", header=T, stringsAsFactors = F) 4 | 5 | library(reshape2) 6 | md <- melt(data, id="id") 7 | ggplot(md, aes(x=id, y=value, color=id)) + 8 | geom_bar(stat = 'identity') + 9 | facet_grid(variable~.)+ 10 | #scale_color_manual(values = c("#66C2A5","#FC8D62","#8DA0CB","#E78AC3","#FFD92F")) + 11 | #geom_jitter(shape=16, position=position_jitter(0.2)) + 12 | xlab("")+ylab("")+theme_bw()+ 13 | theme(plot.title = element_text(color="red", size=10, face="bold.italic"),legend.position = "null",axis.text.x = element_text(size = 10), axis.title.x = element_text(size = 10),axis.text.y = element_text(size = 10),axis.title.y = element_text(size = 10)) 14 | -------------------------------------------------------------------------------- /07_VMap3/17_N-content.R: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/02_Vmap3/before0219/02_DataSource/05_WEGA") 2 | library(ncdf4) # package for netcdf manipulation 3 | library(raster) # package for raster manipulation 4 | library(rgdal) # package for geospatial analysis 5 | library(ggplot2) 6 | library(ggmap) 7 | library(RColorBrewer) 8 | 9 | library(rasterVis) 10 | 11 | data <- read.table("369sample.txt",header=T, stringsAsFactors = F,sep="\t") 12 | 13 | N <- nc_open("/Users/guoyafei/公共资源/01_数据资源/TN5min.nc", write=FALSE, readunlim=TRUE, verbose=FALSE, 14 | auto_GMT=TRUE, suppress_dimvals=FALSE, return_on_error=FALSE ) 15 | lon <- ncvar_get(N, "lon") 16 | lat <- ncvar_get(N,"lat",verbose=F) 17 | t <- ncvar_get(N,"time") 18 | ndvi.array <- ncvar_get(N,"TN") 19 | dim(ndvi.array) 20 | filevalue <- ncatt_get(N,"TN") 21 | ndvi.array[ndvi.array==filevalue$value] <- NA 22 | ndvi.slice <- ndvi.array[,,1] 23 | dim(ndvi.slice) 24 | r <- raster(t(ndvi.slice), xmn=min(lon), xmx=max(lon), ymn=min(lat), ymx=max(lat), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0")) 25 | r <- raster(t(ndvi.slice), xmn=min(lon), xmx=max(lon), ymn=min(lat), ymx=max(lat)) 26 | 27 | plot(r) 28 | 29 | points(landrace$Longitude,landrace$Latitude,pch = 0, bg = "grey",cex=0.5) 30 | points(cultivar$Longitude,cultivar$Latitude,pch = 8, bg = "grey",cex=0.5) 31 | 32 | sample <- data[,c(9,12,11)] 33 | sample <- sample[which(sample$Longitude != "NA"),] 34 | landrace <- sample[which(sample$type.USDA_8. == "Landrace"),] 35 | cultivar <- sample[which(sample$type.USDA_8. == "Cultivar"),] 36 | 37 | 38 | 39 | 40 | sample <- data[,c(12,11)] 41 | sample$NC <- extract(r, sample) 42 | sample$source <- data$DataSource 43 | sample <- sample[which(sample$NC != "NA"),] 44 | #绘图 45 | mp<-NULL 46 | mapworld<-borders("world",colour = "gray70",fill="gray70") 47 | mp<-ggplot()+mapworld+ylim(-60,90) +theme_classic() 48 | color <- brewer.pal(8, "Dark2")[c(1,2,3,4,6)] 49 | #AABBDD---- 50 | mp+geom_point(aes(x=sample$Longitude, y=sample$Latitude, color=sample$NC,shape = sample$source),size=1,alpha=0.7)+ 51 | scale_size(range=c(1,1))+ 52 | #scale_color_manual(values = color) + 53 | scale_colour_gradient2(low = "dark red", mid = "white", high = "dark blue", midpoint = 15)+ 54 | theme_classic()+ 55 | theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+ 56 | #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 57 | # breaks=c("AA", "SS", "DD", "AABB","AABBDD"), 58 | # labels=c("AA", "SS", "DD", "AABB","AABBDD"))+ 59 | theme(axis.text = element_blank()) + ## 删去所有刻度标签 60 | theme(axis.ticks = element_blank()) 61 | #theme(panel.grid =element_blank()) + 62 | #theme(axis.ticks.y = element_blank()) 63 | 64 | 65 | 66 | 67 | colr <- colorRampPalette(brewer.pal(11, 'RdYlBu')[3:9]) 68 | levelplot(r, 69 | margin=FALSE,# suppress marginal graphics 70 | ylim=c(-50,80), 71 | colorkey=list( 72 | space='bottom', # plot legend at bottom 73 | labels=list(at=-5:5, font=4) # legend ticks and labels 74 | ), 75 | par.settings=list( 76 | axis.line=list(col='transparent') # suppress axes and legend outline 77 | ), 78 | scales=list(draw=FALSE), # suppress axis labels 79 | col.regions=colr, # colour ramp 80 | at=seq(-10, 50)) 81 | #layer(sp.polygons(oregon, lwd=3)) # add oregon SPDF with latticeExtra::layer 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /07_VMap3/19_lm.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | a <- read.table("/Users/guoyafei/Desktop/test.txt",header=T,stringsAsFactors = F,sep="\t") 3 | 4 | p <- ggplot(data = a, aes(x = Date, y = Proportion)) + 5 | geom_smooth(method = "lm", 6 | color="#558ebd", 7 | fill="lightgray", 8 | alpha=.7, 9 | size=0.8,se=T, 10 | formula = y~x) + 11 | geom_point(size=4, 12 | alpha=0.7, 13 | color="#fe654c")+ xlim(-10000,-6000)+ ylim(0,1)+ 14 | theme_bw() 15 | p 16 | 17 | lm_eqn <- function(a){ 18 | m <- lm(Proportion ~ Date, a); 19 | eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 20 | list(a = format(unname(coef(m)[1]), digits = 5), 21 | b = format(unname(coef(m)[2]), digits = 2), 22 | r2 = format(summary(m)$r.squared, digits = 3))) 23 | as.character(as.expression(eq)); 24 | } 25 | 26 | p1 <- p + 27 | geom_text(x = -9000, y = 0.75, 28 | label = lm_eqn(a), parse = TRUE) 29 | p1 30 | -------------------------------------------------------------------------------- /07_VMap3/22_recombinationRate.R: -------------------------------------------------------------------------------- 1 | #recombination rate 2 | library(ggplot2) 3 | setwd("/Users/guoyafei/公共资源/01_数据资源/1.0_recombination_rate") 4 | iwgsc1 <- read.table("iwgsc_refseqv1.0_mapping_data.txt", header=T,stringsAsFactors = F) 5 | ggplot(iwgsc1, aes(x=physicalPosition, y=geneticPosition,group=chromosome)) + 6 | geom_point(size=0.5)+ 7 | facet_wrap(vars(chromosome), nrow = 7,scales = "free")+ 8 | geom_line()+ 9 | theme_bw() 10 | 11 | iwgsc2 <- read.table("iwgsc_refseqv1.0_recombination_rate.txt", header=T,stringsAsFactors = F) 12 | ggplot(iwgsc2, aes(x=intervalStart, y=nbOfSnps,group=chromosome)) + 13 | geom_point(size=0.3)+ 14 | facet_wrap(vars(chromosome), nrow = 7,scales = "free")+ 15 | geom_line()+ 16 | theme_bw() 17 | 18 | yafei <- read.table("RecombinationRate_1M_yafei.txt", header=T,stringsAsFactors = F) 19 | ggplot(yafei, aes(x=intervalStart, y=recombinationRate,group=chromosome)) + 20 | geom_point(size=0.3)+ 21 | facet_wrap(vars(chromosome), nrow = 7,scales = "free")+ 22 | geom_line()+ 23 | theme_bw() 24 | 25 | lipeng <- read.table("recombination_rate_geva_lipeng.txt", header=T,stringsAsFactors = F) 26 | ggplot(lipeng, aes(x=Position.bp., y=Map.cM.,group=Chromosome)) + 27 | geom_point(size=0.3)+ 28 | facet_wrap(vars(Chromosome), nrow = 7,scales = "free")+ 29 | geom_line()+ 30 | theme_bw() 31 | 32 | xue <- read.table("slidewindow_recomrate_updown_20M_xpclr_xuebo.txt", header=F,stringsAsFactors = F) 33 | colnames(xue) <- c("chromosome","intervalStart","intervalEnd","nbOfSnps","recombinationRate") 34 | ggplot(xue, aes(x=intervalStart, y=recombinationRate,group=chromosome)) + 35 | geom_point(size=0.3)+ 36 | facet_wrap(vars(chromosome), nrow = 7,scales = "free")+ 37 | geom_line()+ 38 | theme_bw() 39 | 40 | xue <- read.table("RecombinationRate_1M_xuebo.txt", header=F,stringsAsFactors = F) 41 | colnames(xue) <- c("chromosome","intervalStart","intervalEnd","nbOfSnps","recombinationRate") 42 | ggplot(xue, aes(x=intervalStart, y=nbOfSnps,group=chromosome)) + 43 | geom_point(size=0.3)+ 44 | facet_wrap(vars(chromosome), nrow = 7,scales = "free")+ 45 | geom_line()+ 46 | theme_bw() 47 | 48 | ggplot(D, mapping = aes(x = V4)) + 49 | geom_density( alpha = 0.5, color = "#FF7F00",fill="#FF7F00") + 50 | theme_bw() 51 | p <- ggplot(don, aes(x=BPcum, y=P)) + 52 | geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1) + 53 | scale_color_manual(values = rep(c("grey","grey", "skyblue", "skyblue"), 7)) + 54 | scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) + 55 | scale_y_continuous(expand = c(0, 0) ) + 56 | theme_bw() + 57 | ylab("CLR")+ 58 | theme( 59 | legend.position="none", 60 | panel.border = element_blank(), 61 | panel.grid.major.x = element_blank(), 62 | panel.grid.minor.x = element_blank(), 63 | axis.text.x=element_text(size=12), 64 | axis.text.y=element_text(size=12), 65 | axis.title.y=element_text(size = 12), 66 | axis.title.x=element_text(size = 12), 67 | )+ 68 | geom_hline(yintercept=thresh1,color='#E69F00',linetype = "dashed") 69 | ggplot(all, aes(x=V1, y=V2, color=type)) + 70 | geom_bar(stat="identity",width=0.5,position='dodge') + 71 | xlab("")+ 72 | ylab("")+ 73 | theme_bw() 74 | 75 | -------------------------------------------------------------------------------- /07_VMap3/29_correlation.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | genome <- read.table("/Users/guoyafei/公共资源/01_数据资源/chrConvertionRule.txt", header=T,stringsAsFactors = F) 3 | ge <- genome[c(2:43),c(1,3)] 4 | 5 | sum <- read.table("/Users/guoyafei/Desktop/Va.txt", header=F,stringsAsFactors = F) 6 | mean <- read.table("/Users/guoyafei/Desktop/mean2.txt", header=F,stringsAsFactors = F) 7 | snpnum <- read.table("/Users/guoyafei/Desktop/snpnum.txt", header=F,stringsAsFactors = F) 8 | 9 | all <- cbind(ge,sum)[,c(2,4)] 10 | 11 | colnames(all) <- c("ID","length","sum","mean","snpnum") 12 | all$V3 <- all$V2/10000000 13 | 14 | a <- summary(lm(as.numeric(sum[,2])~ as.numeric(snpnum[,2]))) 15 | a <- summary(lm(as.numeric(ge[,2])~ as.numeric(sum[,2]))) 16 | a <- summary(lm(as.numeric(ge[,2])~ as.numeric(snpnum[,2]))) 17 | 18 | x <- append(x,a$adj.r.squared) 19 | 20 | ggplot(data=all, aes(x=EndIndex.exclusive., y=V3))+ 21 | geom_point(size=3,aes(x=EndIndex.exclusive., y=V3))+ 22 | stat_smooth(method="lm")+ 23 | xlab("length")+ 24 | ylab("variance explained")+ 25 | theme_classic() -------------------------------------------------------------------------------- /07_VMap3/34_adapgene.R: -------------------------------------------------------------------------------- 1 | #adaptative genes 2 | setwd("~/Desktop/adapgene/") 3 | #画图:UpSetR 4 | library(UpSetR) 5 | Name <- c("elevation.txt","prec1.txt","prec2.txt","prec3.txt","prec4.txt","prec5.txt","prec6.txt","prec7.txt","prec8.txt","soil11.txt","soil12.txt","soil13.txt","soil2.txt","soil4.txt","soil5.txt","soil6.txt","soil7.txt","soil8.txt","soil9.txt","solar1.txt","solar2.txt","solar3.txt","temp10.txt","temp11.txt","temp1.txt","temp2.txt","temp3.txt","temp4.txt","temp5.txt","temp6.txt","temp7.txt","temp8.txt","temp9.txt") 6 | p <- list() 7 | for(i in c(1:length(Name))){ 8 | data <- read.table(Name[i],header=F,stringsAsFactors = F) 9 | p[[i]] <- data 10 | } 11 | listInput <- list(SF2 = p[[1]][,1], H12 = p[[2]][,1], TajimaD = p[[3]][,1]) 12 | upset(fromList(listInput), order.by = "freq",text.scale = c(2),point.size = 3.5, line.size = 2) 13 | 14 | data <- read.table("/Users/guoyafei/Desktop/adapgene/rep.jaccard.pos.crosstab", header=T, row.names = 1,stringsAsFactors = F) 15 | corrgram(data, order = F,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,gap = 0.1, 16 | main="Correlogram of environment variables intercorrelations") 17 | 18 | data <- read.table("/Users/guoyafei/Desktop/adapgene/rep.num.pos.crosstab", header=T, row.names = 1,stringsAsFactors = F) 19 | corrgram(data, order = F,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,gap = 0.1, 20 | main="Correlogram of environment variables intercorrelations") 21 | 22 | data <- read.table("/Users/guoyafei/Desktop/bio-RDA/471_baypass_taxa.Info", header=T, row.names = 1, stringsAsFactors = F) 23 | sub <- data[,c(3,6:40)] 24 | sub2 <- sub[,c(1,4,8,11,13,24,34,36)] 25 | 26 | corrgram(sub2, order = F,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,gap = 0.1, 27 | main="Correlogram of environment variables intercorrelations") 28 | 29 | library(usdm) 30 | vif(sub) 31 | v1 <- vifcor(sub2,th=0.9) 32 | 33 | v1 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /07_VMap3/36_checksample.R: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/02_Vmap3/before0219/14_VMap3QC/QC") 2 | 3 | 4 | -------------------------------------------------------------------------------- /07_VMap3/37_traitgwas.R: -------------------------------------------------------------------------------- 1 | #-----qq---- 2 | library(ggplot2) 3 | library(qqman) 4 | library(tidyverse) 5 | setwd("/Users/guoyafei/Desktop/traitgwas/pdf") 6 | path <- "/Users/guoyafei/Desktop/traitgwas/plot/tasselgwas/cultivar" 7 | fileNames <- dir(path) 8 | filePath <- sapply(fileNames, function(x){ 9 | paste(path,x,sep='/')}) 10 | data <- lapply(filePath, function(x){ 11 | read.table(x, header=T,stringsAsFactors = F)}) 12 | p <- list() 13 | q <- list() 14 | data <- read.table("cultivar.test.txt",header=F,stringsAsFactors = F) 15 | colnames(data) <- c("SNP","CHR","BP","P") 16 | for ( i in c(1:7)){ 17 | #qq plot 18 | n1 <- strsplit( names(data)[i], ".shuf50k.")[[1]][1] 19 | filename1 <- paste(n1,"cultivar.tassel.qq",sep=".") 20 | filename2 <- paste(n1,"cultivar.tassel",sep=".") 21 | #colnames(data[[i]]) <- c("CHR","SNP","BP","P") 22 | colnames(data[[i]]) <- c("SNP","CHR","BP","P") 23 | qq_dat <- data.frame(obs=-log10(sort(data$P,decreasing =FALSE)), 24 | exp=-log10( ppoints(length(data$P)))) 25 | q[[i]]q <- ggplot(data=qq_dat,aes(exp,obs))+ 26 | geom_point(alpha=0.7,color="#7F7F7FFF")+ 27 | geom_abline(color="#D62728FF")+ 28 | xlab("Expected -log10(P-value)")+ 29 | ylab("Observed -log10(P-value)")+ 30 | scale_x_continuous(limits = c(0,7))+ 31 | scale_y_continuous(limits = c(0,7))+ 32 | #ggtitle(name)+ 33 | theme( 34 | plot.title = element_text(color="red", size=20, face="bold.italic"), 35 | axis.title = element_text(size=12,face="bold"), 36 | axis.text = element_text(face="bold",size=8,color = "black"), 37 | #axis.line = element_line(size=0.8,color="black"), 38 | axis.ticks= element_line(size=0.8,colour = "black"), 39 | panel.grid =element_blank(), 40 | panel.border = element_rect(fill=NA,size = 0.8), 41 | panel.background = element_blank()) 42 | pdf(paste(filename1,".pdf",sep=""),height = 3,width = 3) 43 | print(q[[i]]) 44 | dev.off() 45 | #manhuttan plot 46 | gwasResults <- data[[i]] 47 | don <- gwasResults %>% 48 | group_by(CHR) %>% 49 | summarise(chr_len=max(BP)) %>% 50 | mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>% 51 | select(-chr_len) %>% 52 | left_join(gwasResults, ., by=c("CHR"="CHR")) %>% 53 | arrange(CHR, BP) %>% 54 | mutate( BPcum=BP+tot) 55 | axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) )/ 2) 56 | p[[i]] p <- ggplot(don, aes(x=BPcum, y=-log10(P))) + 57 | geom_point( aes(color=as.factor(CHR)), alpha=0.5, size=1) + 58 | scale_color_manual(values = rep(c("#999999","#999999", "#0072B2", "#0072B2","#E69F00", "#E69F00"), 7)) + 59 | # custom X axis: 60 | scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) + 61 | scale_y_continuous(expand = c(0, 0) ) + 62 | theme_bw() + 63 | ylim(0,7)+ 64 | theme( 65 | legend.position="none", 66 | panel.border = element_blank(), 67 | panel.grid.major.x = element_blank(), 68 | panel.grid.minor.x = element_blank(), 69 | axis.text.x=element_text(size=8), 70 | axis.text.y=element_text(size=10), 71 | axis.title.y=element_text(size = 10), 72 | axis.title.x=element_text(size = 10), 73 | )+ 74 | scale_y_continuous(limits = c(0,7))+ 75 | geom_hline(yintercept = -log10(0.00001), colour="red",linetype=2, size=1) 76 | pdf(paste(filename2,".pdf",sep=""),height = 2,width = 9.6) 77 | 78 | print(p[[i]]) 79 | dev.off() 80 | } 81 | 82 | pdf("cultivar.test.pdf",height = 2,width = 9.6) 83 | print(p) 84 | dev.off() 85 | -------------------------------------------------------------------------------- /07_VMap3/38_bayesR.R: -------------------------------------------------------------------------------- 1 | #bayesR 2 | data <- read.table("bayesR.txt", header=F,stringsAsFactors = F) 3 | data2 <- read.table("chrom.txt", header=T,stringsAsFactors = F) 4 | 5 | ggplot(sub, aes(length, Va,color=type)) + 6 | geom_point( size=3)+ 7 | scale_color_manual(values = c("#999999","#0072B2","#E69F00"))+ 8 | theme_bw() 9 | -------------------------------------------------------------------------------- /07_VMap3/46_fastcall2测试.R: -------------------------------------------------------------------------------- 1 | #FastCall2 2 | setwd("/Users/guoyafei/Desktop/FC2/compare/2.0") 3 | 4 | library(ggplot2) 5 | input <- paste("chr",1:42,".all.txt",sep="") 6 | output <- paste("chr",1:42,".all.pdf",sep="") 7 | for(i in c(1:42)){ 8 | data <- read.table(input[i], header=F, stringsAsFactors = F) 9 | pdf(output[i]) 10 | p <- ggplot(data,aes(V2,fill=V1))+ 11 | geom_histogram(bins=10,aes(x=V2, y=..density..),position="identity",alpha = 0.5)+ 12 | theme_bw() 13 | plot(p) 14 | dev.off() 15 | } 16 | 17 | 18 | data <- read.table("/Users/guoyafei/Desktop/FC2/compare/2.0_raw/file2.0_raw.all.txt", header=F,stringsAsFactors = F) 19 | ggplot(data,aes(V2,fill=V1))+ 20 | geom_histogram(bins=10,aes(x=V2, y=..density..),position = 'dodge',alpha = 0.9)+ 21 | scale_fill_manual(values = c("#FDC086","#BEAED4")) + 22 | theme_bw() 23 | 24 | data <- read.table("/Users/guoyafei/Desktop/FC2/compare/2.1/file2.1.all.txt", header=F,stringsAsFactors = F) 25 | ggplot(data,aes(V2,fill=V1))+ 26 | geom_histogram(bins=10,aes(x=V2, y=..density..),position = 'dodge',alpha = 0.9)+ 27 | scale_fill_manual(values = c("#FDC086","#BEAED4")) + 28 | theme_bw() 29 | 30 | data <- read.table("/Users/guoyafei/Desktop/FC2/compare/filegatk.all.txt", header=F,stringsAsFactors = F) 31 | ggplot(data,aes(V2,fill=V1))+ 32 | geom_histogram(bins=15,aes(x=V2, y=..density..),position="identity",alpha = 0.5)+ 33 | theme_bw() -------------------------------------------------------------------------------- /07_VMap3/47-物种分布模型.R: -------------------------------------------------------------------------------- 1 | #物种分布模型 2 | library(gradientForest) 3 | library(RColorBrewer) 4 | library(rasterVis) 5 | library(LEA) 6 | library(raster) 7 | library(adegenet) 8 | library(maps) 9 | library(dismo) 10 | library(gplots) 11 | library(gdistance) 12 | 13 | -------------------------------------------------------------------------------- /07_VMap3/48_lassip.R: -------------------------------------------------------------------------------- 1 | library(qqman) 2 | library(tidyverse) 3 | setwd("/Users/guoyafei/Desktop/lassip/pdf") 4 | path <- "/Users/guoyafei/Desktop/lassip/plot" 5 | fileNames <- dir(path) 6 | filePath <- sapply(fileNames, function(x){ 7 | paste(path,x,sep='/')}) 8 | data <- lapply(filePath, function(x){ 9 | read.table(x, header=T,stringsAsFactors = F)}) 10 | 11 | #threshold 12 | #library(gdata) 13 | #thresh <- read.xls("thresh.xlsx",sheet=1,row.name=1,na.strings=c("NA","#DIV/0!")) 14 | for (i in seq(1:length(data))){ 15 | filename <- strsplit(names(data)[i], "_")[[1]][1] 16 | p <- list() 17 | gwasResults <- data[[i]] 18 | colnames(gwasResults)[1:2] <- c("CHR", "BP") 19 | don <- gwasResults %>% 20 | group_by(CHR) %>% 21 | summarise(chr_len=max(BP)) %>% 22 | mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>% 23 | select(-chr_len) %>% 24 | left_join(gwasResults, ., by=c("CHR"="CHR")) %>% 25 | arrange(CHR, BP) %>% 26 | mutate( BPcum=BP+tot) 27 | axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) )/ 2) 28 | p[[i]] <- ggplot(don, aes(x=BPcum, y=pop1_T)) + 29 | geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) + 30 | scale_color_manual(values = rep(c("grey", "#56B4E9", "#D55E00"), 7)) + 31 | # custom X axis: 32 | scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) + 33 | scale_y_continuous(expand = c(0, 0) ) + 34 | theme_bw() + 35 | theme( 36 | legend.position="none", 37 | panel.border = element_blank(), 38 | panel.grid.major.x = element_blank(), 39 | panel.grid.minor.x = element_blank(), 40 | axis.text.x=element_text(size=8), 41 | axis.text.y=element_text(size=10), 42 | axis.title.y=element_text(size = 10), 43 | axis.title.x=element_text(size = 10), 44 | ) 45 | #scale_y_continuous(limits = c(0,7))+ 46 | #geom_hline(yintercept = -log10(0.0699), colour="red",linetype=2, size=1) 47 | pdf(paste(filename,"_T.pdf",sep=""),height = 3,width = 6.5) 48 | print(p[[i]]) 49 | dev.off() 50 | } 51 | 52 | all <- data[[11]] 53 | ggplot(all, aes(pop1_g2g1, pop1_g123)) + 54 | geom_point(size=2, alpha = 0.5,colour = "grey",shape = 20) + 55 | #geom_point(data = sub_gene1, aes(V2, V3, size=2, alpha = 0.5,colour = "red")) + 56 | geom_vline(xintercept=0.059,color='red',linetype = "dotted")+ 57 | #geom_hline(yintercept=0.059,color='red',linetype = "dotted")+ 58 | #geom_text_repel(data = sub_gene1,aes(V2, V3, label = name),max.overlaps = 100) + 59 | xlab("H2/H1")+ 60 | ylab("H12")+ 61 | theme_bw()+ 62 | theme(legend.position="none",legend.title=element_blank(),axis.text.x = element_text(size = 15), axis.title.x = element_text(size = 15),axis.text.y = element_text(size = 15),axis.title.y = element_text(size = 15)) 63 | 64 | 65 | -------------------------------------------------------------------------------- /07_VMap3/49_RAiSD.R: -------------------------------------------------------------------------------- 1 | library(qqman) 2 | library(tidyverse) 3 | setwd("/Users/guoyafei/Desktop/RAiSD/pdf") 4 | path <- "/Users/guoyafei/Desktop/RAiSD/plot" 5 | fileNames <- dir(path) 6 | filePath <- sapply(fileNames, function(x){ 7 | paste(path,x,sep='/')}) 8 | data <- lapply(filePath, function(x){ 9 | read.table(x, header=F,stringsAsFactors = F)}) 10 | 11 | #threshold 12 | #library(gdata) 13 | #thresh <- read.xls("thresh.xlsx",sheet=1,row.name=1,na.strings=c("NA","#DIV/0!")) 14 | for (i in seq(1:length(data))){ 15 | filename <- strsplit(names(data)[i], "_")[[1]][1] 16 | p <- list() 17 | gwasResults <- data[[i]] 18 | colnames(gwasResults) <- c("CHR", "BP", "P") 19 | don <- gwasResults %>% 20 | group_by(CHR) %>% 21 | summarise(chr_len=max(BP)) %>% 22 | mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>% 23 | select(-chr_len) %>% 24 | left_join(gwasResults, ., by=c("CHR"="CHR")) %>% 25 | arrange(CHR, BP) %>% 26 | mutate( BPcum=BP+tot) 27 | axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) )/ 2) 28 | p[[i]] <- ggplot(don, aes(x=BPcum, y=P)) + 29 | geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) + 30 | scale_color_manual(values = rep(c("grey", "#56B4E9", "#D55E00"), 7)) + 31 | # custom X axis: 32 | scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) + 33 | scale_y_continuous(expand = c(0, 0) ) + 34 | theme_bw() + 35 | theme( 36 | legend.position="none", 37 | panel.border = element_blank(), 38 | panel.grid.major.x = element_blank(), 39 | panel.grid.minor.x = element_blank(), 40 | axis.text.x=element_text(size=8), 41 | axis.text.y=element_text(size=10), 42 | axis.title.y=element_text(size = 10), 43 | axis.title.x=element_text(size = 10), 44 | ) 45 | #scale_y_continuous(limits = c(0,7))+ 46 | #geom_hline(yintercept = -log10(0.0699), colour="red",linetype=2, size=1) 47 | pdf(paste(filename,".pdf",sep=""),height = 3,width = 6.5) 48 | print(p[[i]]) 49 | dev.off() 50 | } 51 | 52 | #temp selection regions 53 | library(UpSetR) 54 | 55 | setwd("/Users/guoyafei/Desktop/RAiSD/") 56 | #data <- read.table("temp.all.plot.txt", header=F, stringsAsFactors = F) 57 | 58 | EU <- read.table("temp.EU.gene.plot.txt", header=F,stringsAsFactors = F) 59 | EU <- EU[,1] 60 | #EU <- data[,2] 61 | #EU <- EU[which(EU!="0")] 62 | WA <- read.table("temp.WA.gene.plot.txt", header=F,stringsAsFactors = F) 63 | WA <- WA[,1] 64 | #WA <- data[,2] 65 | #WA <- WA[which(WA!="0")] 66 | CA <- read.table("temp.CA.gene.plot.txt", header=F,stringsAsFactors = F) 67 | CA <- CA[,1] 68 | #CA <- data[,3] 69 | #CA <- CA[which(CA!="0")] 70 | SA <- read.table("temp.SA.gene.plot.txt", header=F,stringsAsFactors = F) 71 | SA <- SA[,1] 72 | #SA <- data[,4] 73 | #SA <- SA[which(SA!="0")] 74 | EA <- read.table("temp.EA.gene.plot.txt", header=F,stringsAsFactors = F) 75 | EA <- EA[,1] 76 | #EA <- data[,5] 77 | #EA <- EA[which(EA!="0")] 78 | AM <- read.table("temp.AM.gene.plot.txt", header=F,stringsAsFactors = F) 79 | AM <- AM[,1] 80 | #AM <- data[,6] 81 | #AM <- EA[which(EA!="0")] 82 | 83 | 84 | all <- list( 85 | EU <- EU, 86 | WA <- WA, 87 | CA <- CA, 88 | SA <- SA, 89 | EA <- EA 90 | ) 91 | 92 | names(all) <- c("EU","WA","CA","SA","EA") 93 | upset(fromList(all), order.by = "freq") 94 | 95 | upset(fromList(all), keep.order = TRUE,text.scale = c(1),point.size = 1.5, line.size = 1) 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /07_VMap3/51_LinkNe.R: -------------------------------------------------------------------------------- 1 | #LinkNe 2 | setwd("/Users/guoyafei/Desktop/LinkNe/") 3 | library(ggplot2) 4 | data <- read.table("ne.merge.25pop", header=F,stringsAsFactors = F) 5 | sub <- data[which(data$V1 %in% c(400,200)),] 6 | sub_decline <- sub[which(sub$V6 %in% c("pop1","pop2","pop3","pop4","pop6","pop8","pop9","pop10","pop11","pop13","pop14","pop16","pop17","pop19","pop20","pop22","pop23")),] 7 | 8 | sub <- data[which(data$V1 %in% c(400,200)),] 9 | sub_no_decline <- sub[which(sub$V6 %in% c("pop5","pop7","pop12","pop15","pop18","pop21","pop24","pop25")),] 10 | 11 | pdf("test1.pdf",height=30,width=10) 12 | ggplot(sub_no_decline, aes(V1, V3,type=V6)) + 13 | geom_point() + 14 | geom_line() + 15 | theme_classic()+ 16 | ylim(150,1400)+ 17 | labs(x = 'generation', y = 'Ne') 18 | print(p) 19 | dev.off() 20 | 21 | a <- paste("pop",c(9,16,17,25,2,7,20,19),sep="") 22 | sub <- data[which(data$V3 %in% a),] 23 | b <- paste("pop",c(9,16,17,25,2,7,20,19,15,22,1,5,8,11,6,13,4,10.21),sep="") 24 | sub <- data[which(data$V3 %in% b),] 25 | -------------------------------------------------------------------------------- /07_VMap3/54_density.R: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/Vmap3/density") 2 | require(RIdeogram) 3 | 4 | 5 | gene_density <- read.table("all.count.txt", header=T, stringsAsFactors = F) 6 | 7 | wheat_karyotype <- read.table("wheat_karyotype.txt", header=T, stringsAsFactors = F) 8 | ideogram(karyotype = wheat_karyotype, overlaid = gene_density) 9 | convertSVG("chromosome.svg", device = "pdf") 10 | 11 | #wheat_karyotype 12 | #Chr Start End CE_start CE_end 13 | #1 0 248956422 122026459 124932724 14 | #2 0 242193529 92188145 94090557 15 | 16 | #gene_density 17 | #Chr Start End Value 18 | #1 1 1000000 65 19 | #1 1000001 2000000 76 20 | library(ggExtra) 21 | setwd("/Users/guoyafei/Desktop/毕业论文/fig/depth") 22 | 23 | data <- read.table("v3.0.depth.DD.txt", header=T, stringsAsFactors = F) 24 | p <- ggplot() + 25 | geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) + 26 | xlim(0,15)+ 27 | ylim(0,10)+ 28 | theme_bw() 29 | p1 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue") 30 | 31 | data <- read.table("v3.2.depth.DD.txt", header=T, stringsAsFactors = F) 32 | p <- ggplot() + 33 | geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) + 34 | xlim(6,14)+ 35 | ylim(3,8)+ 36 | theme_bw() 37 | p2 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue") 38 | 39 | data <- read.table("v3.0.depth.AB.txt", header=T, stringsAsFactors = F) 40 | p <- ggplot() + 41 | geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) + 42 | xlim(0,15)+ 43 | ylim(0,10)+ 44 | theme_bw() 45 | p3 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue") 46 | 47 | data <- read.table("v3.2.depth.AB.txt", header=T, stringsAsFactors = F) 48 | p <- ggplot() + 49 | geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) + 50 | xlim(4,10)+ 51 | ylim(3,8)+ 52 | theme_bw() 53 | p4 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue") 54 | 55 | 56 | data <- read.table("v3.0.depth.ABD.txt", header=T, stringsAsFactors = F) 57 | p <- ggplot() + 58 | geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) + 59 | xlim(0,15)+ 60 | ylim(0,10)+ 61 | theme_bw() 62 | p5 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue") 63 | 64 | data <- read.table("v3.2.depth.ABD.txt", header=T, stringsAsFactors = F) 65 | p <- ggplot() + 66 | geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) + 67 | xlim(5,11)+ 68 | ylim(3,7)+ 69 | theme_bw() 70 | p6 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue") 71 | 72 | pdf("ori.D.pdf", width=4,height=4) 73 | print(p1) 74 | dev.off() 75 | 76 | pdf("filter.D.pdf", width=4,height=4) 77 | print(p2) 78 | dev.off() 79 | 80 | pdf("ori.AB.pdf", width=4,height=4) 81 | print(p3) 82 | dev.off() 83 | 84 | pdf("filter.AB.pdf", width=4,height=4) 85 | print(p4) 86 | dev.off() 87 | 88 | pdf("ori.ABD.pdf", width=4,height=4) 89 | print(p5) 90 | dev.off() 91 | 92 | pdf("filter.ABD.pdf", width=4,height=4) 93 | print(p6) 94 | dev.off() 95 | 96 | -------------------------------------------------------------------------------- /07_VMap3/Lulab_germplasm_Info.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/07_VMap3/Lulab_germplasm_Info.xlsx -------------------------------------------------------------------------------- /07_VMap3/Lulab_germplasm_Storage.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/07_VMap3/Lulab_germplasm_Storage.xlsx -------------------------------------------------------------------------------- /BSAseq/QTLseq_BSA.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/BSAseq/QTLseq_BSA.R -------------------------------------------------------------------------------- /BSAseq/gatk_step1_Index.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # building sequence alginment dictionary, samtools faidx and gatk creatSequenceDictionary 3 | #Usage: sh gatk_step1.sh /path/your_genome.fasta 4 | bwa=/usr/bin/bwa # set where to find software 5 | gatk=/mnt/zhou/hangyuan/miniconda3/bin/gatk 6 | samtools=/usr/local/bin/samtools 7 | 8 | #bwa index 9 | reference=$1 10 | time $bwa index "$reference" && echo "** bwa index done! ** " 11 | #samtools index 12 | time $samtools faidx $reference && echo "** samtools faidx done! ** " 13 | 14 | #注意:使用GATK之前,需要先建立参考基因组索引文件.dict和.fai 15 | #.dict中包含了基因组中contigs的名字,也就是一个字典; 16 | #.fai也就是fasta index file,索引文件,可以快速找出参考基因组的碱基,由samtools faidx构建 17 | #构建.dict文件(原来要使用picard的CreateSequenceDictionary模块,但是现在gatk整合了此模块,可以直接使用) 18 | # gatk createSequenceDictionary 19 | time $gatk --java-options "-Xmx100G -Djava.io.tmpdir=./tmp" CreateSequenceDictionary \ 20 | -R "$reference" \ 21 | -O "$reference.dict" \ 22 | && echo "** gatk createSequenceDictionary done! **" -------------------------------------------------------------------------------- /BSAseq/gatk_step2_callvar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # this is a whole gonome gatk snp calling full process 3 | #Uage: sh gatk_step2.sh sample_1.fastq sample_2.fastq out_dictionary 4 | fastp=/usr/local/bin/fastp 5 | bwa=/usr/bin/bwa 6 | gatk=/mnt/zhou/hangyuan/miniconda3/bin/gatk 7 | samtools=/usr/local/bin/samtools 8 | reference=/mnt/zhou/hangyuan/BSA-seq/P20210613_AtBSA-raw.bam.results_20210621/01.rawData/TAIR10.fa 9 | NTHREADS=30 10 | fq1=$1 11 | fq2=$2 12 | sample=${fq1%%_*} 13 | outdir=$3 14 | outdir=${outdir}/${sample} 15 | 16 | if [ ! -d "$outdir/cleanfq" ] 17 | then mkdir -p "$outdir/cleanfq" 18 | fi 19 | 20 | if [ ! -d "$outdir/bwa" ] 21 | then mkdir -p "$outdir/bwa" 22 | fi 23 | 24 | if [ ! -d "$outdir/gatk" ] 25 | then mkdir -p "$outdir/gatk" 26 | fi 27 | 28 | time $fastp -i "$fq1" \ 29 | -I "$fq2" \ 30 | -o "$outdir/cleanfq/${sample}_1.fastq" \ 31 | -O "$outdir/cleanfq/${sample}_2.fastq" \ 32 | -h "$outdir/cleanfq/${sample}.html" \ 33 | -j "$outdir/cleanfq/${sample}.json" \ 34 | && echo '** fq QC done' 35 | time $bwa mem -M -R "@RG\\tID:${sample}\\tSM:${sample}\\tPL:ILLUMINA" \ 36 | -t $NTHREADS \ 37 | $reference \ 38 | "$outdir/cleanfq/${sample}_1.fastq" "$outdir/cleanfq/${sample}_2.fastq" \ 39 | | $samtools view -bS - > "$outdir/bwa/${sample}.bam"\ 40 | && echo '** BWA MEM done ** ' 41 | time $samtools sort \ 42 | -@ $NTHREADS \ 43 | -O bam \ 44 | -o "$outdir/bwa/${sample}.sorted.bam" \ 45 | "$outdir/bwa/${sample}.bam"\ 46 | && echo "** sorted raw bamfiles done" 47 | time $samtools index "$outdir/bwa/${sample}.sorted.bam" && echo "** ${sample}.sorted.bam index done " 48 | 49 | # 去除PCR重复 50 | time $gatk MarkDuplicates \ 51 | -I "$outdir/bwa/${sample}.sorted.bam" \ 52 | -M "$outdir/bwa/${sample}.markup_metrics.txt" \ 53 | -O "$outdir/bwa/${sample}.sorted.markup.bam" \ 54 | 1> "$outdir/bwa/${sample}.log.mark" 2>&1 \ 55 | && echo "** ${sample}.sorted.bam MarkDuplicates done **" 56 | 57 | # samtools index 去除PCR标记的bam 文件 58 | time $samtools index "$outdir/bwa/${sample}.sorted.markup.bam" \ 59 | && echo " ** ${sample}.sorted.markup.bam index done **" 60 | 61 | #gatk开始:必选 -I -O -R,代表输入、输出、参考 62 | #接下来可以按照字母顺序依次写出来,这样比较清晰 63 | #-bamout:将一整套经过gatk程序重新组装的单倍体基因型(haplotypes)输出到文件 64 | #-stand-call-conf :低于这个数字的变异位点被忽略,可以设成标准30(默认是10) 65 | time $gatk --java-options "-Xmx100G -Djava.io.tmpdir=./tmp" HaplotypeCaller \ 66 | -R $reference \ 67 | -I "$outdir/bwa/${sample}.sorted.markup.bam" \ 68 | -O "$outdir/gatk/${sample}.HC.gvcf.gz" \ 69 | --tmp-dir "$outdir/gatk"\ 70 | --emit-ref-confidence GVCF \ 71 | -stand-call-conf 10 \ 72 | && echo "** GVCF ${sample}.HC.gvcf.gz done ***" -------------------------------------------------------------------------------- /BSAseq/gatk_step3_gvcfmerg.sh: -------------------------------------------------------------------------------- 1 | # move the gvcf and tbi file into the directory 2 | gatk=/mnt/zhou/hangyuan/miniconda3/bin/gatk 3 | reference=/mnt/zhou/hangyuan/BSA-seq/P20210613_AtBSA-raw.bam.results_20210621/01.rawData/TAIR10.fa 4 | time $gatk CombineGVCFs \ 5 | -R $reference \ 6 | -V HR.HC.gvcf.gz \ 7 | -V NHR.HC.gvcf.gz \ 8 | -O BSA_with2pools.gvcf.gz 9 | time $gatk GenotypeGVCFs \ 10 | -R $reference \ 11 | -V "BSA_with2pools.gvcf.gz" \ 12 | -O "BSA_no_filter.HC.vcf" -------------------------------------------------------------------------------- /BSAseq/gatk_step4_filter_SNP.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | gatk=/mnt/zhou/hangyuan/miniconda3/bin/gatk 3 | reference=/mnt/zhou/hangyuan/BSA-seq/P20210613_AtBSA-raw.bam.results_20210621/01.rawData/TAIR10.fa 4 | 5 | #select SNP:提取出SNP 6 | time $gatk SelectVariants -R $reference \ 7 | -V BSA_no_filter.HC.vcf \ 8 | -select-type SNP \ 9 | -O BSA_combine_SNPs.vcf 10 | #select INDELS:提取出INDELS 11 | time $gatk SelectVariants -R $reference \ 12 | -V BSA_no_filter.HC.vcf \ 13 | -select-type INDEL \ 14 | -O BSA_combine_INDELs.vcf 15 | 16 | #To filter SNPs:过滤出高质SNPs 17 | time $gatk VariantFiltration \ 18 | -V BSA_combine_SNPs.vcf \ 19 | -filter "MQ < 40.0" \ 20 | --filter-name "MQ_filter_SNP" \ 21 | -filter "FS > 60.0" \ 22 | --filter-name "FS_filter_SNP" \ 23 | -filter "QD < 4.0" \ 24 | --filter-name "QD_filter_SNP" \ 25 | -O BSA_SNPs_filter.vcf 26 | 27 | grep -E '^#|PASS' BSA_SNPs_filter.vcf > BSA_SNPs_filterPASSED.vcf 28 | 29 | #To filter INDELS:过滤出高质量INDELS 30 | time $gatk VariantFiltration \ 31 | -V BSA_combine_INDELs.vcf \ 32 | -filter "QD < 4.0" \ 33 | --filter-name "QD_filter_INDEL" \ 34 | -filter "FS > 200.0" \ 35 | --filter-name "FS_filter_INDEL" \ 36 | -O BSA_INDELs_filter.vcf 37 | 38 | grep -E '^#|PASS' BSA_INDELs_filter.vcf > BSA_INDELs_filterPASSED.vcf 39 | 40 | #合并snp过滤结果和indel过滤结果 41 | time $gatk MergeVcfs \ 42 | -I BSA_INDELs_filterPASSED.vcf \ 43 | -I BSA_SNPs_filterPASSED.vcf \ 44 | -O BSA_filter_merged_SNP_INDEL.vcf 45 | 46 | #This is the number of sites before filtering: 47 | grep -vc "^#" BSA_combine_SNPs.vcf 48 | grep -vc "^#" BSA_combine_INDELs.vcf 49 | #This is the number of sites retained after filtering: 50 | grep -vc "^#" BSA_SNPs_filter.vcf 51 | grep -vc "^#" BSA_INDELs_filter.vcf 52 | 53 | time $gatk VariantsToTable \ 54 | -R $reference \ 55 | -V BSA_filter_merged_SNP_INDEL.vcf \ 56 | -F CHROM -F POS -F REF -F ALT -GF AD -GF DP -GF GQ -GF PL \ 57 | -O BSA.filter.table -------------------------------------------------------------------------------- /Basic_Path.sh: -------------------------------------------------------------------------------- 1 | #jar 2 | 204:/data1/home/yafei/005_Script/Jar/ 3 | #E6 VCF 4 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/VCF 5 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225 6 | 204:xuebo:/data2/xuebo/Projects/Speciation/E6/Landrace_locate_225 7 | #RDA 8 | 9 | #做迁徙和环境适应性路径 10 | #RDA 11 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/Lineages 12 | #42chr 13 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6 14 | 203:yafei:/data1/home/yafei/008_Software/snpEff/data2 15 | #lineages 16 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/Out_E6/VCF/VmapE6 17 | #分群 18 | 204:yafei:/data1/home/yafei/003_Project3/Structure/group 19 | #EU_group.txt North2_30_group.txt South_group.txt Tibet.txt 20 | #North1_group.txt North2_group.txt South_group_noTibet.txt WA_group.txt 21 | #LFMM 22 | 204@yafei /data1/home/yafei/003_Project3/Structure/LFMM/V2 23 | #XP-clr + smooth 24 | 204:xuebo:/data2/xuebo/Projects/Speciation/xpclr/Selection_V2/smooth/smooth_result/Top5% 25 | 203:xuebo:/data1/home/xuebo/Projects/Speciation/xpclr/Selection_V2 26 | 204:/data2/xuebo/Projects/Speciation/xpclr/Selection_V3 27 | #VIP gene分析 28 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2/VIP_genes 29 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2 30 | #VCF 变异注释分析 31 | 203:yafei:/data1/home/yafei/008_Software/snpEff/ 32 | SNPeff----52个GA通路相关基因的变异分析 33 | 参考网站:https://www.jianshu.com/p/a6e46d0c07ee(SnpEff使用方法) 34 | https://www.jianshu.com/p/f898ffc4ef48(SnpEff结果解读) 35 | fasta文件: 203: /data1/home/yafei/Project3/HaploType/Fulab/Fa/ 36 | vcf文件: 203: /data1/home/yafei/Project3/HaploType/Fulab/Vcf/ 37 | 软件位置: 203: /data1/home/yafei/Software/snpEff/ 38 | 结果文件: 203: /data1/home/yafei/Software/snpEff/data/ 39 | #XP-CLR negative contral 40 | /Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/NegativeContral 41 | #IBS with CS 42 | 203:/data2/yafei/003_project3/Project3/CS_Vmap 43 | 计算FST 44 | 203:/data2/yafei/003_project3/Project3/FST_group 45 | Fst_population 46 | 203:/data2/yafei/003_project3/Project3/FST_group/VmapData/FST_selection/Fst_pop 47 | #VMap3数据集 48 | filter1: reliable library 49 | filter2: hetThresh = 0.05; nonmissingThresh = 0.8; macThresh = 2; biallele 50 | java -Xmx300g -jar /data2/yafei/004_Vmap3/Fastcall2/03_Jar/PrivatePlantGenetics.jar 51 | 204:/data4/home/yafei/vcf_AB1/VCF/chr001_VMap3.vcf.gz 52 | 204:/data4/home/yafei/vcf_AB1/Filter2/chr001_VMap3.vcf.gz 53 | #53先导file 54 | 204:/data4/home/yafei/vcf_AB1/vcf/VCF/chr001_VMap3.vcf.gz 55 | #gff文件 56 | 204:/data2/yafei/GeneLulab1_1_LC.gff3 57 | 204:/data2/yafei/gene_v1.1_Lulab.gff3 58 | #VMap3基本统计 59 | 204:/data2/yafei/004_Vmap3/Fastcall2/02_Output/vcf_AB1/Filter2/5k.tree/structure 60 | #DEF-est 61 | 204:/data1/home/yafei/008_Software/dfe-alpha-release-2.16/Test/input/A.1fold_scaled.txt 62 | #bayenv 63 | 204:yafei:/data1/home/yafei/003_Project3/Structure/E6_Landrace_locate_225/bayenv 64 | #BAYENV 65 | 204@yafei:/data1/home/yafei/003_Project3/Structure/bayenv/ENVBAY 66 | #VMap3的group分类 67 | 204@yafei:/data4/home/yafei/vcf_AB1/Merge/Group 68 | -------------------------------------------------------------------------------- /Code/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/Code/.DS_Store -------------------------------------------------------------------------------- /Code/01_Density.r: -------------------------------------------------------------------------------- 1 | #install.packages('RIdeogram') 2 | require(RIdeogram) 3 | 4 | setwd("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/13_Plots/01_Density") 5 | #gene_density <- read.table("ArinaLrFor_LTR_1.txt", header=T,stringsAsFactors = F) 6 | gene_density2 <- read.table("gene_density.txt", header=T, stringsAsFactors = F) 7 | gene_density <- read.table("21-1M_VMap3_SnpDensity.txt", header=T, stringsAsFactors = F) 8 | wheat_karyotype <- read.table("wheat_karyotype.txt", header=T, stringsAsFactors = F) 9 | ideogram(karyotype = wheat_karyotype, overlaid = gene_density) 10 | convertSVG("chromosome.svg", device = "pdf") 11 | 12 | library(CMplot) 13 | setwd("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据") 14 | mydata<-read.table("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据/fastcall2_001_pos.txt",header=TRUE,sep="\t") 15 | head(mydata) 16 | # snp chr pos 17 | # snp1_1 1 2041 18 | # snp1_2 1 2062 19 | # snp1_3 1 2190 20 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 21 | mydata<-read.table("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据/fastcall_001_pos.txt",header=TRUE,sep="\t") 22 | head(mydata) 23 | # snp chr pos 24 | # snp1_1 1 2041 25 | # snp1_2 1 2062 26 | # snp1_3 1 2190 27 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) -------------------------------------------------------------------------------- /Code/04_FST.r: -------------------------------------------------------------------------------- 1 | #画热图 2 | #文件目录:yafei@203:/data2/yafei/Project3/FST_group/VmapData/heatmap 3 | #A_mean.fst B_mean.fst D_mean.fst AB_mean.fst 4 | #A_weight.fst B_weight.fst D_weight.fst AB_weight.fst 5 | 6 | library("corrplot") 7 | 8 | #weighted FST 9 | 10 | #A lineage 11 | data <- read.table("A_weight.fst",header=T,stringsAsFactors=F,sep="\t") 12 | rownames(data) <- data$Alineage 13 | data<- data[,-1] 14 | data<- as.matrix(data) 15 | pdf("weight_A_fst.pdf",width=30,height=30) 16 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1)) 17 | dev.off() 18 | 19 | #B lineage 20 | data <- read.table("B_weight.fst",header=T,stringsAsFactors=F,sep="\t") 21 | rownames(data) <- data$Blineage 22 | data<- data[,-1] 23 | data<- as.matrix(data) 24 | pdf("weight_B_fst.pdf",width=30,height=30) 25 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1)) 26 | dev.off() 27 | 28 | #D lineage 29 | data <- read.table("D_weight.fst",header=T,stringsAsFactors=F,sep="\t") 30 | rownames(data) <- data$Dlineage 31 | data<- data[,-1] 32 | data<- as.matrix(data) 33 | pdf("weight_D_fst.pdf",width=30,height=30) 34 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1)) 35 | dev.off() 36 | 37 | #AB lineage 38 | data <- read.table("AB_weight.fst",header=T,stringsAsFactors=F,sep="\t") 39 | rownames(data) <- data$ABlineage 40 | data<- data[,-1] 41 | data<- as.matrix(data) 42 | pdf("weight_AB_fst.pdf",width=30,height=30) 43 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1)) 44 | dev.off() 45 | 46 | #mean FST 47 | 48 | #A lineage 49 | data <- read.table("A_mean.fst",header=T,stringsAsFactors=F,sep="\t") 50 | rownames(data) <- data$Alineage 51 | data<- data[,-1] 52 | data<- as.matrix(data) 53 | pdf("mean_A_fst.pdf",width=30,height=30) 54 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1)) 55 | dev.off() 56 | 57 | #B lineage 58 | data <- read.table("B_mean.fst",header=T,stringsAsFactors=F,sep="\t") 59 | rownames(data) <- data$Blineage 60 | data<- data[,-1] 61 | data<- as.matrix(data) 62 | pdf("mean_B_fst.pdf",width=30,height=30) 63 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1)) 64 | dev.off() 65 | 66 | #D lineage 67 | data <- read.table("D_mean.fst",header=T,stringsAsFactors=F,sep="\t") 68 | rownames(data) <- data$Dlineage 69 | data<- data[,-1] 70 | data<- as.matrix(data) 71 | pdf("mean_D_fst.pdf",width=30,height=30) 72 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1)) 73 | dev.off() 74 | 75 | #AB lineage 76 | data <- read.table("AB_mean.fst",header=T,stringsAsFactors=F,sep="\t") 77 | rownames(data) <- data$ABlineage 78 | data<- data[,-1] 79 | data<- as.matrix(data) 80 | pdf("mean_AB_fst.pdf",width=30,height=30) 81 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1)) 82 | dev.off() 83 | -------------------------------------------------------------------------------- /Code/07_PCA.r: -------------------------------------------------------------------------------- 1 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral") 2 | library("FactoMineR") 3 | library("ggplot2") 4 | library("factoextra") 5 | Elevation <- read.csv("Elevation.txt",fill=TRUE, row.names = 1,na.strings = "",header = T, sep="\t", stringsAsFactors = F) 6 | Temp <- read.table("Temp.txt", header=T,row.names = 1,stringsAsFactors = F) 7 | Prec <- read.table("Prec.txt",header=T,row.names = 1,stringsAsFactors = F) 8 | bio <- read.table("20bio.txt",header=T,stringsAsFactors = F) 9 | dt <- as.matrix(scale(bio[!duplicated(bio, fromLast=TRUE),])) 10 | rm1<-cor(dt) 11 | rm1 12 | rs1<- eigen(rm1) 13 | #提取结果中的特征值,即各主成分的方差; 14 | val <- rs1$values 15 | 16 | #换算成标准差(Standard deviation); 17 | (Standard_deviation <- sqrt(val)) 18 | #计算方差贡献率和累积贡献率; 19 | (Proportion_of_Variance <- val/sum(val)) 20 | (Cumulative_Proportion <- cumsum(Proportion_of_Variance)) 21 | #碎石图绘制; 22 | par(mar=c(6,6,2,2)) 23 | plot(rs1$values,type="b", 24 | cex=2, 25 | cex.lab=2, 26 | cex.axis=2, 27 | lty=2, 28 | lwd=2, 29 | xlab = "PC", 30 | ylab="Eigenvalue (Principal Component Variance)") 31 | #提取结果中的特征向量(也称为Loadings,载荷矩阵); 32 | (U<-as.matrix(rs1$vectors)) 33 | #进行矩阵乘法,获得PC score; 34 | PC <- dt %*% U 35 | colnames(PC) <- paste("PC",1:20,sep="") 36 | head(PC) 37 | 38 | #将iris数据集的第5列数据合并进来; 39 | #df<-data.frame(PC, dt_all$ploidy, dt_all$Region) 40 | #head(df) 41 | 42 | library(ggplot2) 43 | #提取主成分的方差贡献率,生成坐标轴标题; 44 | xlab<-paste0("PC1(",round(Proportion_of_Variance[1]*100,2),"%)") 45 | ylab<-paste0("PC2(",round(Proportion_of_Variance[2]*100,2),"%)") 46 | #绘制散点图并添加置信椭圆; 47 | p1<-ggplot(data = df,aes(x=PC1,y=PC2,color=df$dt_all.ploidy))+ 48 | stat_ellipse(aes(fill=df$dt_all.ploidy), 49 | type ="norm", geom ="polygon",alpha=0.2,color=NA)+ 50 | geom_point()+labs(x=xlab,y=ylab,color="")+ 51 | guides(fill=F)+ 52 | theme_classic() 53 | p2<-ggplot(data = PC,aes(x=PC1,y=PC2))+ 54 | geom_point()+labs(x=xlab,y=ylab,color="")+ 55 | guides(fill=F)+ 56 | theme_classic() 57 | p1 58 | p2 59 | 60 | #------ 61 | dt_all$lat_bin <- NA 62 | dt_all[which(dt_all$lat >= -45 & dt_all$lat < -35),24] <- "[-45,-35)" 63 | dt_all[which(dt_all$lat >= -35 & dt_all$lat < -25),24] <- "[-35,-25)" 64 | dt_all[which(dt_all$lat >= -25 & dt_all$lat < -15),24] <- "[-25,-15)" 65 | dt_all[which(dt_all$lat >= -15 & dt_all$lat < -5),24] <- "[-15,-5)" 66 | dt_all[which(dt_all$lat >= -5 & dt_all$lat < 5),24] <- "[-5,5)" 67 | dt_all[which(dt_all$lat >= 5 & dt_all$lat < 15),24] <- "[5,15)" 68 | dt_all[which(dt_all$lat >= 15 & dt_all$lat < 25),24] <- "[15,25)" 69 | dt_all[which(dt_all$lat >= 25 & dt_all$lat < 35),24] <- "[25,35)" 70 | dt_all[which(dt_all$lat >= 35 & dt_all$lat < 45),24] <- "[35,45)" 71 | dt_all[which(dt_all$lat >= 45 & dt_all$lat < 55),24] <- "[45,55)" 72 | dt_all[which(dt_all$lat >= 55 & dt_all$lat < 65),24] <- "[55,65)" 73 | dt_all[which(dt_all$lat >= 65 & dt_all$lat < 75),24] <- "[65,75)" 74 | df$lat_bin <- dt_all$lat_bin 75 | p2<-ggplot(data = df,aes(x=PC1,y=PC2,color=df$lat_bin))+ 76 | stat_ellipse(aes(fill=df$dt_all.Region), 77 | type ="norm", geom ="polygon",alpha=0.2,color=NA)+ 78 | geom_point()+labs(x=xlab,y=ylab,color="")+ 79 | guides(fill=F)+ 80 | theme_classic() 81 | p1 82 | -------------------------------------------------------------------------------- /Code/09_Admixture.sh: -------------------------------------------------------------------------------- 1 | #按MAF>0.05和缺失率<0.1过滤 2 | plink --vcf test.imputed.vcf --maf 0.05 --geno 0.1 --recode vcf-iid --out test.filter --allow-extra-chr --double-id --autosome-num 42 3 | --keep-only-indels 4 | #对VCF进行LD筛选 5 | plink --vcf test.filter.vcf --indep-pairwise 50 10 0.2 --out test.filterLD --allow-extra-chr --double-id --autosome-num 42 6 | 7 | #提取筛选结果 8 | #plink --vcf test.filter.vcf --make-bed --extract test.filterLD.prune.in --out test.filter.prune.in --double-id --autosome-num 42 9 | plink --vcf test.filter.vcf --extract test.filterLD.prune.in --recode vcf-iid --out test.filter.prune.in --double-id --autosome-num 42 10 | 11 | #转换成structure/admixture格式 12 | plink --bfile test.filter.prune.in --recode structure --out test.filter.prune.in #生成. recode.strct_in为structure输入格式 13 | plink --bfile test.filter.prune.in --recode 12 --out test.filter.prune.in --autosome-num 42 #生成.ped为admixture输入格式 14 | plink --bfile test.filter.prune.in --recode vcf --out test.filter.prune.in --autosome-num 42 15 | 16 | #admixture 17 | admixture --cv test.filter.ped 1 >>log.txt 18 | admixture --cv test.filter.ped 2 >>log.txt 19 | ....... 20 | admixture --cv test.filter.ped 13 >>log.txt 21 | wait 22 | grep "CV error" log.txt > k_1to13 23 | -------------------------------------------------------------------------------- /Code/10_Xp-clr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sleep 4800s 3 | for i in {1..42} 4 | do 5 | WGS --model vcf --type toXPCLR --group1 ../groupTaxa/EA.txt --rec ../../slidewindow_recomrate_updown_20M.txt --chr $i --file /data2/xuebo/Projects/Speciation/E6/chr${i}.Land.vcf.gz --out groupEAChr${i} & 6 | done 7 | 8 | #!/bin/bash 9 | for i in {1..42} 10 | do 11 | XPCLR -xpclr ../groupEA/groupEAChr${i}.geno ../groupWA/groupWAChr${i}.geno ../groupWA/groupWAChr${i}.snp EA_WA_10kchr${i} -w1 0.005 500 10000 $i -p1 0.95 & 12 | done 13 | -------------------------------------------------------------------------------- /Code/14_Xp-clr_Haplotype.sh: -------------------------------------------------------------------------------- 1 | #working directory 2 | #204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2 3 | 4 | #extract gene_region VCF from 225 all vcf files 5 | grep -w -f 87Gene_id.txt gene_v1.1_Lulab.gff3 | awk -F";" '{print $1}' |awk '{print $1"\t"$4-5000"\t"$5+5000"\t"$9}' | awk -F"\tID=" '{print $2"\t"$1}' > 87gene_5k.txt 6 | #bash getVcf.sh 87gene_5k.txt 7 | cat $1 |while read gene chr from to 8 | do 9 | #echo $chr $from $to 10 | #if echo $2 |grep -q '.*.vcf.gz$';then 11 | # vcftools --gzvcf $2 --chr $chr --from-bp $from --to-bp $to --recode --recode-INFO-all --out $gene.$chr.$from-$to 12 | #elif echo $2 |grep -q '.*.vcf$';then 13 | vcftools --gzvcf /data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/chr$chr.E6_Landrace_locate.vcf.gz --chr $chr --from-bp $from --to-bp $to --recode --recode-INFO-all --out 5k.$gene.$chr.$from-$to 14 | #fi 15 | done 16 | 17 | #传到本地用Java Migration/Haplotype将vcf转换成单倍型txt格式,使用07_VCF_Haplotype_Visual.r进行可视化。 18 | #本地路径:/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/Gene/V2/TXT -------------------------------------------------------------------------------- /Code/15_Xp-clr_NegativeContral.r: -------------------------------------------------------------------------------- 1 | #XP-CLR negative contral 2 | library(RColorBrewer) 3 | library(ggplot2) 4 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral") 5 | dist <- read.table("dis_matrix.txt",header=T,stringsAsFactors = F) 6 | out <- strsplit(dist[,1], ":") 7 | dist$ID1 <- NA 8 | dist$ID2 <- NA 9 | a<-1 10 | for (i in c(1:length(out))){ 11 | dist[a,6] <- out[[i]][1] 12 | dist[a,7] <- out[[i]][2] 13 | a <- a+1 14 | } 15 | #dim(dist[which(dist$geopolitical_dis>100 & dist$TEMP_dis<10),]) 16 | WA <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/WA.txt",header =F,stringsAsFactors = F) 17 | North1 <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/North1.txt",header=F,stringsAsFactors = F) 18 | North2 <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/North2.txt",header=F,stringsAsFactors = F) 19 | #Temperature 20 | pdf("Temperature_dist.pdf",height = 10,width = 10) 21 | #WA VS North1 22 | dist$Type1 <- NA 23 | dist[which((dist$ID1 %in% WA[,1] & dist$ID2 %in% North1[,1])) ,8] <- "Yes" 24 | dist[which((dist$ID1 %in% North1[,1] & dist$ID2 %in% WA[,1])) ,8] <- "Yes" 25 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) + 26 | geom_point( size=3,alpha=0.2)+ 27 | theme_classic()+ 28 | ggtitle("WA VS North1")+ 29 | theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 30 | print(p) 31 | #WA VS North2 32 | dist$Type1 <- NA 33 | dist[which((dist$ID1 %in% WA[,1] & dist$ID2 %in% North2[,1])) ,8] <- "Yes" 34 | dist[which((dist$ID1 %in% North2[,1] & dist$ID2 %in% WA[,1])) ,8] <- "Yes" 35 | 36 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) + 37 | geom_point( size=3,alpha=0.2)+ 38 | theme_classic()+ 39 | ggtitle("WA VS North2")+ 40 | theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 41 | 42 | print(p) 43 | 44 | #North1 VS North2 45 | dist$Type1 <- NA 46 | dist[which((dist$ID1 %in% North2[,1] & dist$ID2 %in% North1[,1])),8] <- "Yes" 47 | dist[which((dist$ID1 %in% North1[,1] & dist$ID2 %in% North2[,1])),8] <- "Yes" 48 | 49 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) + 50 | geom_point( size=3,alpha=0.2)+ 51 | theme_classic()+ 52 | ggtitle("North1 VS North2")+ 53 | theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25)) 54 | print(p) 55 | dev.off() 56 | -------------------------------------------------------------------------------- /Code/17_Sample_Cluster.r: -------------------------------------------------------------------------------- 1 | library(cluster) 2 | library(factoextra) 3 | #聚类 4 | #根据经纬度给样本聚类 5 | #data <- read.table("land.txt", header=T, sep="\t", stringsAsFactors = F) 6 | #dataA <- data[,c(3,4)] 7 | #data2 <- dataA[!is.na(dataA$Latitude),] 8 | 9 | data <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/04_bayenv/225env.txt", header=F,stringsAsFactors = F) 10 | colname <- c("elevation","temp1","temp2","temp3","temp4","temp5","temp6","temp7","temp8","temp9","temp10","temp11","prec1","prec2","prec3","prec4","prec5","prec6","prec7","prec8","Latitude","Logititude") 11 | rownames(data) <- data[,1] 12 | data2 <- data[!is.na(data$V6),-1] 13 | colnames(data2) <- colname 14 | df = scale(data2,center = T,scale = T) 15 | colnames(df) <- colname 16 | #按列进行标准化 17 | #先求样本之间两两相似性 18 | result <- dist(df, method = "euclidean") 19 | #使用指定距离来计算数据矩阵行之间的距离 20 | #euclidean:欧几里得距离 21 | #maximum:最大距离 22 | #manhattan:绝对距离 23 | #canberra:堪培拉距离 24 | #minkowski:闵可夫斯基距离 25 | #产生层次结构 26 | result_hc <- hclust(d = result, method = "ward.D2") 27 | #Ward: 最小方差方法旨在寻找紧凑的球形簇的完整的联动方法找到相似集群。 28 | #有两种不同的算法,"ward.D"(相当于只沃德选择"ward"不执行沃德(1963)聚类准则)& "ward.D2"实现了标准(Murtagh的及Legendre 2014)在集群更新之前对差异进行平方。注意agnes(*, method="ward")对应于hclust(*, "ward.D2"). 29 | #median和centroid在不导致单调的距离测量,或者等效产生的树状图可以具有所谓的倒置或颠倒。 30 | 31 | data2$type <- cutree(result_hc, k=30) 32 | lat_mean <- tapply(data2[,21],data2$type,mean,na.rm = TRUE) 33 | lon_mean <- tapply(data2[,22],data2$type,mean,na.rm = TRUE) 34 | data2$cluster1 <- NA 35 | data2$cluster2 <- NA 36 | for(i in 1:224) { 37 | for(j in 1:30){ 38 | if(data2[i,23] == j ){ 39 | data2[i,24] <- as.numeric(lat_mean[j]) 40 | data2[i,25] <- as.numeric(lon_mean[j]) 41 | } 42 | } 43 | } 44 | write.table(data2,"30_cluster.txt",sep="\t",row.names = F,quote=F) 45 | 46 | library(maps) 47 | mp<-NULL 48 | mapworld<-borders("world",colour = "gray70",fill="gray70") 49 | mp<-ggplot()+mapworld+ylim(-90,90) 50 | mp_40<-mp+geom_point(aes(x=re$Longitude_40, y=re$Latitude_40))+ 51 | scale_size(range=c(1,1))+ 52 | theme_classic() -------------------------------------------------------------------------------- /Code/19_Qmatrix_PieMap.r: -------------------------------------------------------------------------------- 1 | #统一设置:注释内容及颜色 2 | library(pheatmap) 3 | require(reshape) 4 | require (rworldmap) 5 | require(rworldxtra) 6 | library(RColorBrewer) 7 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5") 8 | annotation_col <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Taxa_Region_225.txt",header=T,stringsAsFactors = F) 9 | rownames(annotation_col) = c(1:325) 10 | seq <- annotation_col[,1] 11 | #cluster samples: 按照算Xp-clr的区域划分 12 | out <- annotation_col[,c(4,5,9)] 13 | lat_mean <- tapply(out[,1],out$Region_sub2,mean,na.rm = TRUE) 14 | lon_mean <- tapply(out[,2],out$Region_sub2,mean,na.rm = TRUE) 15 | out$cluster1 <- NA 16 | out$cluster2 <- NA 17 | for(i in 1:225) { 18 | for(j in names(lat_mean)){ 19 | if(out[i,3] == j ){ 20 | out[i,4] <- as.numeric(lat_mean[j]) 21 | out[i,5] <- as.numeric(lon_mean[j]) 22 | } 23 | } 24 | } 25 | mode <- as.data.frame(cbind(annotation_col[,2],as.numeric(out[,4]),as.numeric(out[,5])),stringsAsFactors = F) 26 | mode$V2 <- as.numeric(mode$V2) 27 | mode$V3 <- as.numeric(mode$V3) 28 | #map 29 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/TXT" ##文件目录 30 | fileNames <- dir(path) ##获取该路径下的文件名 31 | filePath <- sapply(fileNames, function(x){ 32 | paste(path,x,sep='/')}) ##生成读取文件路径 33 | data <- lapply(filePath, function(x){ 34 | read.table(x, header=F,stringsAsFactors = F)}) 35 | pdf("cut_8_piemap.pdf") 36 | for (i in c(1:82)){ 37 | all <- as.matrix(data[[i]]) 38 | colnames(all) <- c(1:225) 39 | all <- all[,seq] 40 | data.e <- dist(t(all)) 41 | model <- hclust(data.e,method = "complete") 42 | #plot(model) 43 | result <- as.numeric(cutree(model, k=8)) 44 | out2 <- as.data.frame(cbind(mode,result)) 45 | out2$value <- 1 46 | colnames(out2)[1:5] <- c("type","Latitude", "Logititude", "Sub.population.2", "value") 47 | wheat2 = out2[!is.na(out2$Latitude),] 48 | wheat = wheat2[!is.na(wheat2$Sub.population.2),] 49 | wheat_reshape <- cast(wheat,Latitude+Logititude~Sub.population.2) 50 | wheat_reshape2 <- as.data.frame(wheat_reshape) 51 | mapPies(wheat_reshape2,xlim=c(-120,140),ylim=c(0,40),nameX="Logititude",nameY="Latitude",nameZs=c("1","2","3","4","5","6","7","8"),symbolSize=1, 52 | zColours=brewer.pal(8, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main=i) 53 | } 54 | dev.off() -------------------------------------------------------------------------------- /Code/20_Qmatrix_Visual.r: -------------------------------------------------------------------------------- 1 | #统一设置:注释内容及颜色 2 | library(pheatmap) 3 | require(reshape) 4 | require (rworldmap) 5 | require(rworldxtra) 6 | library(pophelper) 7 | library(ggplot2) 8 | require(gridExtra) 9 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/06_Structure/V1/Select") 10 | #load Qmatrix files 11 | sfiles <- list.files(path="/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/06_Structure/V1/Select/QMatrix", full.names=T) 12 | slist <- readQ(files=sfiles) 13 | tabulateQ(qlist=readQ(sfiles)) 14 | summariseQ(tabulateQ(qlist=readQ(sfiles))) 15 | 16 | pdf("Q.pdf",width = 24,height = 8) 17 | p1 <- plotQ(slist[1:7],returnplot=T,exportplot=F,quiet=T,basesize=11, 18 | sortind="all",showindlab=F,showyaxis=T,showticks=T,sharedindlab=T,clustercol=brewer.pal(7, "Set2")) 19 | grid.arrange(p1$plot[[1]],p1$plot[[2]],p1$plot[[3]],p1$plot[[4]],p1$plot[[5]],nrow=5) 20 | dev.off() 21 | -------------------------------------------------------------------------------- /Code/21_PCA_climate.r: -------------------------------------------------------------------------------- 1 | #working directory 2 | #/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/NegativeContral 3 | library(RColorBrewer) 4 | library(ggmap) 5 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral") 6 | location <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/01_IBS/04_795taxaIBS/795_Location.txt",header=T,stringsAsFactors = F) 7 | 8 | Elevation <- read.table("Elevation.txt",header=T,stringsAsFactors = F) 9 | ele <- merge(Elevation,location,by="ID") 10 | colnames(ele)[2] <-"PC1" 11 | 12 | Temp <- read.table("temp_PC.txt",header=T,stringsAsFactors = F) 13 | Temp$ID <- rownames(Temp) 14 | tem <- merge(Temp,location,by="ID") 15 | 16 | Prec <- read.table("Prec_PC.txt",header=T,stringsAsFactors = F) 17 | Prec$ID <- rownames(Prec) 18 | pre <- merge(Prec,location,by="ID") 19 | 20 | bio <- read.table("20bio_PC.txt",header=T,row.names = 1) 21 | #bio$ID <- rownames(bio) 22 | #bio <- merge(bio,location,by="ID") 23 | 24 | data <- list(ele,tem,pre) 25 | pdf("20bio_PC1.pdf",width = 12,height = 7.5) 26 | for(i in c(1:2)){ 27 | mp <- NULL 28 | mapworld <- borders("world",colour = "grey90",fill="white") 29 | mp <- ggplot()+mapworld+ylim(-60,90)+xlim(-25,200) 30 | mp2 <- mp+geom_point(aes(x=bio$Logititude,y=bio$Latitude,color=bio$PC1),size=2.6)+scale_size(range=c(1,1))+ 31 | scale_colour_gradientn(colours = brewer.pal(11, "RdYlBu"))+ 32 | theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))+ 33 | theme(panel.grid = element_blank()) + 34 | #theme(panel.grid =element_blank()) + ## 删去网格线 35 | #theme(axis.text = element_blank()) + ## 删去刻度标签 36 | #theme(axis.ticks = element_blank()) + ## 删去刻度线 37 | theme(panel.border = element_blank()) 38 | print(mp2) 39 | } 40 | dev.off() 41 | 42 | 43 | #看筛选样本的分布位点 44 | #taxa <- read.table("group_V2/max_temp.txt",header=F,stringsAsFactors = F) 45 | #colnames(taxa)[1] <-"ID" 46 | #max <- merge(taxa,location,by="ID") 47 | 48 | -------------------------------------------------------------------------------- /Code/22_VIP_Gene_XpCLR_signal.r: -------------------------------------------------------------------------------- 1 | library(qqman) 2 | #画信号位点基因的XP-CLR的染色体信号 3 | #Working directory 4 | #xuebo@204:/data2/xuebo/Projects/Speciation/xpclr/Selection_V3/smooth/lineage_V2/Top5%/VIP_gene_XpCLR 5 | #VIP_gene 6 | #1. TraesCS4D02G364400 Vrn2-2 24 58277633 58279728 chr4D:509282253-509284348(-) Transcription factor GHD7 [UniProtKB/Swiss-Prot:E5RQA1] Os10g0560400 NA 7 | #2. TraesCS2D02G079600 Ppd-1(PRR) 11 33952048 33956269 chr2D:33952048-33956269(-) Two-component response regulator-like PRR37 [UniProtKB/Swiss-Prot:A2YQ93] Os07g0695100 NA 8 | #3. TraesCS2A02G081900 Ppd-A1 7 36933684 36938202 chr2A:36933684-36938202(-) Two-component response regulator-like PRR37 [UniProtKB/Swiss-Prot:A2YQ93] Os07g0695100 NA 9 | #4. TraesCS5A02G473800 Q-5A 26 196896739 196900381 chr5A:650127258-650130900(-) APETALA2-like protein 2 [UniProtKB/Swiss-Prot:Q84TB5] Os03g0818800 NA 10 | #5. TraesCS4B02G043100 Rht-B1 21 30861268 30863723 chr4B:30861268-30863723(+) DELLA protein RHT-1 [UniProtKB/Swiss-Prot:Q9ST59] Os03g0707600 AT1G66350 11 | #6. TraesCS1D02G029100 Sr33 5 11451423 11459353 chr1D:11451423-11459353(+) Disease resistance protein RGA5 [UniProtKB/Swiss-Prot:F7J0N2] NA AT3G46530 12 | #7. TraesCS1D02G040400 Sr45 5 19341296 19346065 chr1D:19341296-19346065(+) Putative disease resistance protein RGA4 [UniProtKB/Swiss-Prot:Q7XA39] Os10g0130800 N 13 | 14 | #File Gene_Name Chr Start Stop 15 | #North2_South_smooth_A Ppd-A1 7 36933684 36938202 16 | #North2_South_smooth_B Rht-B1 21 30861268 30863723 17 | #North2_South_smooth_D Sr45 5 19341296 19346065 18 | #WA_EU_smooth_A Q-5A 26 196896739 196900381 19 | #WA_EU_smooth_D Sr45 5 19341296 19346065 20 | #EU_South_smooth_B Rht-B1 21 30861268 30863723 21 | #EU_South_smooth_D Flowering1 24 58277633 58279728 22 | #EU_South_smooth_D Sr45 5 19341296 19346065 23 | #WA_South_smooth_D Flowering1 24 58277633 58279728 24 | #WA_South_smooth_D Sr33 5 11451423 11459353 25 | #Tibet_South_smooth_D Ppd-1(PRR) 11 33952048 33956269 26 | 27 | cat $1 |while read file chr1 chr2 28 | do 29 | awk '{if($1== "'${chr1}'" || $1=="'${chr2}'") {print $0}}' ../../${file}| sort -k1,1n -k2,2g |sed '1i Chr\tWindowStart\tWindowStop\tSNPcount\tMeanY\tWstat' > ${file::-4}.${chr1}.${chr2}.txt 30 | done 31 | 32 | #把42条染色体合并成21条 33 | #转移到本地画图 34 | #/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3 35 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3") 36 | #批量读取Xp-clr结果文件 37 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3/TXT" ##文件目录 38 | fileNames <- dir(path) ##获取该路径下的文件名 39 | filePath <- sapply(fileNames, function(x){ 40 | paste(path,x,sep='/')}) ##生成读取文件路径 41 | data <- lapply(filePath, function(x){ 42 | read.table(x, header=T,stringsAsFactors = F)}) 43 | 44 | #读取阈值文件 45 | thresHold <- read.table("thresHold.txt",header=T,stringsAsFactors = F) 46 | rownames(thresHold) <- thresHold$GEO_region 47 | #读取着丝粒文件 48 | centromer <- read.table("centromerer.txt",header=T,stringsAsFactors = F) 49 | rownames(centromer) <- centromer$chr 50 | 51 | #读取VIP基因文件 52 | gene <- read.table("VIP_gene.txt",header=T,stringsAsFactors = F) 53 | rownames(gene) <- paste(gene$File,gene$Position,sep=".") 54 | out1 <- strsplit(sub('.chr',':chr', names(data)), ":") 55 | out2 <- strsplit(sub('.txt',':txt', names(data)), ":") 56 | 57 | pdf("Xp-clr.pdf",width = 10,height = 5) 58 | for(i in c(1:length(data))){ 59 | name <- out1[[i]][1] 60 | tit <- out2[[i]][1] 61 | chr <- strsplit(out1[[i]][2], ".txt")[[1]][1] 62 | title <- paste(gene[tit,1],gene[tit,2],sep=":") 63 | cen <- centromer[chr,4] 64 | plot(data[[i]]$WindowStart/1000000, data[[i]]$MeanY, ylim = c(-2,50),axes = T,col = "skyblue",type="l",cex = 2,lwd = 3,ann = F) 65 | abline(v = gene[tit,7]/1000000, col = "red", cex=2,lwd = 1) 66 | abline(h=thresHold[name,3], col = "red", lty = 3,cex=2,lwd = 2) 67 | title(title,xlab= chr, ylab = 'Xp-clr', font.lab = 1, cex.lab = 1.5) 68 | points(cen/1000000,0, pch=20,cex=2,col="grey") 69 | } 70 | dev.off() 71 | -------------------------------------------------------------------------------- /Code/23_Gene_HaploType_Map.r: -------------------------------------------------------------------------------- 1 | #work directory: 2 | #/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/sample_group.txt 3 | #/data1/home/yafei/003_Project3/bayenv/13pop/candidate_gene 4 | #/data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225 5 | #Rht-B1: 6 | #TraesCS4B02G043100 7 | #chr21 start:30861268 end:30863723 8 | #ppd-D1 9 | #TraesCS2D01G079600 10 | #chr11 start:33952048 end:33956269 11 | cat *cloned.gene | sort | uniq -c | awk '{print $2"\t"$3}' > top5.candidate.txt 12 | awk '{print $2}' top5.candidate.txt > test 13 | grep -f test gene_v1.1_Lulab.gff3 |awk '{print $1"\t"$4"\t"$5"\t"$9}' |awk -F";" '{print $1}' |sed 's/ID=//' > test2 14 | awk 'NR==FNR{a[$2]=$1;b[$2]=$2} NR!=FNR {if($4 in b) {print $0"\t"a[$4]}}' top5.candidate.txt test2 | awk '{print $1"\t"$2-5000"\t"$3+5000"\t"$4"\t"$5}' |sed '1i Chr\tstart-5k\tend+5k\tID\tName' > gene_region.txt 15 | 16 | cat gene_region.txt |while read chr from to ID Name 17 | do 18 | if [ $chr != "Chr" ];then 19 | bcftools filter /data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/chr${chr}.E6_Landrace_locate.vcf.gz --regions ${chr}:${from}-${to} > ${ID}-${Name}.vcf 20 | fi 21 | done 22 | for i in `ls *vcf` 23 | do 24 | 25 | vcftools --vcf $i --012 26 | paste out.012.pos <(cat out.012 | datamash transpose | sed '1d' ) > ${i::-4}.vcf.txt 27 | cat out.012.indv | datamash transpose | awk '{print "File\tChr\tPos\t"$0}' > indi.txt 28 | rm out.012* 29 | done 30 | for i in `ls *vcf.txt` 31 | do 32 | awk '{print FILENAME"\t"$0}' $i | sed 's/.vcf.txt//g' > ${i::-4}.vcf.txt2 33 | done 34 | cat indi.txt *vcf.txt2 > all.pos.txt 35 | rm TraesCS* 36 | 37 | library(reshape) 38 | library(ggplot2) 39 | library(RColorBrewer) 40 | require (rworldmap) 41 | setwd("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/") 42 | data <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/01_RDA_plot/225.taxa.txt", header=T,row.names = 1, sep="\t", stringsAsFactors = F) 43 | taxa <- read.table("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/20210829/Cluster_Location/cluster_70.txt",header=T,row.names = 1, stringsAsFactors = F) 44 | for(i in names(table(data$File))){ 45 | sub <- data[which(data$File==i),] 46 | file <- paste(i,".pdf",sep="") 47 | pdf(file) 48 | for( j in c(1:dim(sub)[1])){ 49 | tit <- sub[j,3] 50 | sample <- sub[j,4:228] 51 | loc <- taxa[names(sample),4:5] 52 | loc$type <- as.numeric(sample[1,]) 53 | loc$value <- 1 54 | wheat_reshape <- cast(loc,Latitude_70+Longitude_70~type) 55 | wheat_reshape2 <- as.data.frame(wheat_reshape) 56 | name <- names(table(loc$type)) 57 | if((j-1)%%4 !=0){ 58 | mapPies(wheat_reshape2,xlim=c(-10,130),ylim=c(10,70),nameX="Longitude_70",nameY="Latitude_70",nameZs=name,symbolSize=1.5, 59 | zColours=brewer.pal(8, "Set3")[c(2,3,4,5)],barOrient='vert',oceanCol="white",landCol=brewer.pal(9,"Pastel1")[9],main="tit") 60 | legend(25,95,box.lty=0,bg="transparent","108 landrace GrowHabbit", col="black") 61 | }else{ 62 | par(mfrow=c(2,2),oma=c(1,1,1,1), mar=c(0,1,0,1), cex=1) 63 | mapPies(wheat_reshape2,xlim=c(-10,130),ylim=c(10,70),nameX="Longitude_70",nameY="Latitude_70",nameZs=name,symbolSize=1.5, 64 | zColours=brewer.pal(12, "Set3")[c(2,3,4,5)],barOrient='vert',oceanCol="white",landCol=brewer.pal(9,"Pastel1")[9], main="tit") 65 | legend(25,95,box.lty=0,bg="transparent",tit, col="black") 66 | } 67 | } 68 | dev.off() 69 | } 70 | -------------------------------------------------------------------------------- /Code/readme.txt: -------------------------------------------------------------------------------- 1 | 01:Plot Chromosome density map. 2 | 02:Calculate IBS distance 3 | 03:Calculate population genetic diversity 4 | 04:Calculate the fixation index (FST) 5 | 05:Plot the genetic drift and genetic diversity of each population. 6 | 06:MDS analysis 7 | 07:Genotype PCA 8 | 08:Extract geo-environmental climate variables 9 | 09:Population structure analysis 10 | 10:Xp-clr analysis 11 | 11:Xp-clr smooth 12 | 12:Xp-clr signal regions 13 | 13:Xp-clr with bayenv 14 | 14:Xp-clr haplotype 15 | 15:Xp-clr negative contral 16 | 16:RDA analysis 17 | 17:Sample location cluster analysis 18 | 18:Haplotype map 19 | 19:Plot the haplotype geographic distribution map. 20 | 20:Population structure visualization 21 | 21:Climate PCA 22 | 22:Candicate gene XP-CLR signal visualization 23 | 23:Candicate gene haplotype map 24 | 24:Candicate gene haplotype visualization 25 | 25:Candicate gene snpEff 26 | 26:Environmental GWAS 27 | 27:PPD gene analysis 28 | 28:Bayenv analysis 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Codes for Vmap1.1 and Vmap3 2 | ### Statistics and Graphics drawing 3 | 4 | PI.r: Calculate population genetic diversity 5 | 6 | FST.r: Calculate the fixation index (FST) 7 | 8 | IBS.r: Calculate IBS distance 9 | 10 | Daf.r: Calculate derived allele frequency 11 | 12 | Maf.r: Calculate minor allele frequency 13 | 14 | AdmixtureQ.r: population structure analysis 15 | 16 | ChrBin.r: Draw a chromosome diagram 17 | 18 | Cluster.r: Sample location cluster analysis 19 | 20 | ExtractMap.r: Extract geo-environmental climate variables 21 | 22 | LDjump.r: Linkage disequilibrium analysis 23 | 24 | Migration.r: Visualizing spatial population structure from geo-referenced genetic samples 25 | 26 | Pheatmap.r: Haplotype map 27 | 28 | Plot_Density.r: Plot Chromosome density map. 29 | 30 | Plot_Drift_Pi.r: Plot the genetic drift and genetic diversity of each population. 31 | 32 | Plot_Fst.r: Draw heat maps of the fixation index (FST) between subspecies. 33 | 34 | Plot_Haplotype.r: Draw haplotype map after population structure analysis. 35 | 36 | Plot_IBS.r: Draw heat maps of IBS distance of each population. 37 | 38 | Plot_MapPie.r: Plot the haplotype geographic distribution map. 39 | 40 | Plot_Pi.r: Plot the genetic diversity of each population. 41 | 42 | Enrich.r: Gene function enrichment visualization 43 | 44 | Manhuttan.r: Draw a map of Manhattan 45 | -------------------------------------------------------------------------------- /R_Code.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | --------------------------------------------------------------------------------