├── .DS_Store
├── .gitattributes
├── .gitignore
├── 01_Vmap1.1R
    ├── .DS_Store
    ├── 01_Xp-clr_Smooth_V1.r
    ├── 01_Xp-clr_Smooth_V2.r
    ├── 02_Xp-clr_SelectGene_V1.sh
    ├── 02_Xp-clr_SelectGene_V2.sh
    ├── 03_Xp-clr_BayenvOver.sh
    ├── 03_Xp-clr_Haplotype.sh
    ├── 03_Xp-clr_NegativeContral.r
    ├── 03_Xp-clr_to_jiao.r
    ├── 04_9VIP_Gene_Seq_anno.r
    ├── 04_9VIP_Xp-clr_Haplotype.sh
    ├── 04_GeneStructureMa.r
    ├── 04_GeneStructureMap.r
    ├── 04_Qmatrix_Visual.r
    ├── 04_VIP_Gene_XpCLR_signal.r
    ├── 05_Sample_Cluster.r
    ├── 06_Pheatmap.r
    ├── 06_Qmatrix_PieMap.r
    ├── 07_VCF_Haplotype_Visual.r
    ├── 08_Fst_density.r
    ├── 08_Fst_density.sh
    ├── 08_VIP_Gene_Heatmap.r
    ├── 09_RDA.r
    ├── 09_RDA_02(invalid).R
    ├── 10_Density.r
    ├── 11_Drift_pi.r
    ├── 12_FST.r
    ├── 13_IBS(invalid).r
    ├── 13_IBS2.r
    ├── 13_Xp-clr.sh
    ├── 14_PI.r
    ├── 15_PCA.r
    ├── 15_PCA_climate.r
    ├── 15_VRN.r
    ├── 16_Barcode.r
    ├── 17_CreateHmp.r
    ├── 18_Daf.r
    ├── 19_Enrich.r
    ├── 20_ExtractMap.r
    ├── 21_LDjump.r
    ├── 22_Maf.r
    ├── 23_Merge_DoneFile.r
    ├── 24_MergeTest.r
    ├── 25_MergeVCF.r
    ├── 26_Migration.r
    ├── 27_Permut.r
    ├── 28_Pluk.r
    ├── 29_Statistic.r
    ├── 30_Wenn.r
    ├── 31_WheatMap.r
    ├── 32_NeSize.r
    ├── 33_vf_pipe.sh
    ├── 34_Vmap3.sh
    ├── 35_ChrBin.R
    ├── 35_Storage.sh
    ├── 36_LDJump.R
    ├── 37_permut.R
    ├── 38_Venn.R
    ├── 39_42-21chr.sh
    ├── 40_MDS.r
    ├── 41_Method1.sh
    ├── 42_MCMC.sh
    ├── 43_est.sh
    ├── 44_Bayenv.r
    ├── 45_Admixture.sh
    ├── 46_VMap3流程.sh
    ├── 47_TestGF.r
    ├── 49_Variants_Age.sh
    ├── 50_LFMM.r
    ├── 51_105XD.r
    ├── 52_37gene_snpEff.r
    ├── 53_37gene_haplotype.r
    ├── 55_VMap3_siteQC.r
    ├── 56_VMap3_taxaQC.r
    ├── 57_Gene_HaploType.r
    ├── 57_Gene_HaploType_Map.r
    ├── 58_PPD.r
    ├── 59_Depth.R
    ├── 60_response.R
    ├── LICENSE
    ├── Scripts.Rproj
    ├── assets
    │   └── 16442473466436.jpg
    └── 可能有用的smcpp.R
├── 02_Vmap3R
    ├── .DS_Store
    ├── 01_Tree_Anno.r
    ├── 02_Enrich.r
    ├── 03_Filt_vcf.r
    ├── 04_IBS.r
    ├── 05_Map.r
    ├── 06_Map2.r
    ├── 07_Nucdiff.sh
    ├── 08_PCA.r
    ├── 09_PCA2.r
    ├── 10_Maf-Dis.r
    ├── 11_Statistic.sh
    ├── 12_VcfQc.r
    ├── 13_VcfStatistic.r
    └── 14_Snp_Density.sh
├── 03_Analysis
    ├── .DS_Store
    ├── 01_ChangeFileFormat_1.r
    ├── 02_ChangeFileFormat_2.r
    ├── 03_Basic_Statistic.sh
    ├── 04_Basic_Statistic_2.r
    ├── 05_FST_Calculate.r
    ├── 06_SeperationSite_Calculate.r
    ├── 07_TajimasD_Calculate.sh
    ├── 08_PI_Calculate.sh
    ├── 09_IBS.r
    ├── 09_IBS_Calculate.sh
    ├── 10_IceData.r
    ├── 11_ReadDepth.sh
    └── 12_BWA.sh
├── 04_GWAS
    ├── .DS_Store
    ├── 01_WEGA_gwas.sh
    ├── 02_Manhattan.r
    ├── 03_Gene_Domain.r
    ├── 04_Gene_trait.r
    ├── 05_Plot_Gene_Trait.r
    ├── 06_Plot_Rht.r
    ├── 08_Correlation.r
    └── 09_envGWAS.r
├── 05_Plot
    ├── Plot_Density.r
    ├── Plot_Drift_Pi.r
    ├── Plot_Finder.r
    ├── Plot_Fst.r
    ├── Plot_Haplotype.r
    ├── Plot_IBS.r
    ├── Plot_MapPie.r
    ├── Plot_Pi.r
    ├── Plot_Pi_02.r
    └── Plot_Tajima'D.r
├── 06_MarkDown
    ├── ASMC.md
    ├── GATK.md
    ├── MCMC.md
    ├── Wheat migration routes.md
    └── bayenv_method.md
├── 07_VMap3
    ├── .DS_Store
    ├── 01_Map.r
    ├── 02_ASMC.r
    ├── 03_FastSMC.r
    ├── 04_BasicStatistics.r
    ├── 05_TasselGWAS.sh
    ├── 06_Selscan.r
    ├── 07_scam.r
    ├── 08_Raxml-ng.r
    ├── 09_WheatGo.r
    ├── 10_SDS.r
    ├── 11_PiRatio.r
    ├── 12_Selection.R
    ├── 13_Baypass.r
    ├── 14_snpEff.R
    ├── 15_Rareallele.R
    ├── 16_Network.R
    ├── 17_N-content.R
    ├── 18_Haplotype.R
    ├── 19_lm.R
    ├── 20_fst-piR.R
    ├── 20_smcpp.R
    ├── 21_sweed.R
    ├── 22_recombinationRate.R
    ├── 23_clustersample.R
    ├── 24_h12.R
    ├── 25_ehh.R
    ├── 26_ppi.R
    ├── 27_xpclr.R
    ├── 28_envgwas.R
    ├── 29_correlation.R
    ├── 30_polygraph.R
    ├── 31_gradientForest.R
    ├── 32_coFu.Rmd
    ├── 33_环境变量降维.r
    ├── 34_adapgene.R
    ├── 35_表型处理.R
    ├── 36_checksample.R
    ├── 37_traitgwas.R
    ├── 38_bayesR.R
    ├── 39_coXiao.R
    ├── 40_环境适应性位点.R
    ├── 41_表型环境关联.R
    ├── 42_群体结构分析.R
    ├── 43_RDA.R
    ├── 44_polygenic_adaptation.R
    ├── 45_partialRda.R
    ├── 46_fastcall2测试.R
    ├── 47-物种分布模型.R
    ├── 48_lassip.R
    ├── 49_RAiSD.R
    ├── 50_riskscore.R
    ├── 51_LinkNe.R
    ├── 52_JM44.R
    ├── 53_coTian.R
    ├── 54_density.R
    ├── Lulab_germplasm_Info.xlsx
    ├── Lulab_germplasm_Storage.xlsx
    ├── VMap3.info
    ├── VMap3.info2
    ├── VMap3_382landrace_82clim_PC10.txt
    ├── VMap3_523landrace_climPC.txt
    ├── VMap3_523landrace_withBioclimate.txt
    └── vmap3.Rmd
├── BSAseq
    ├── BSA-seq混合分组鉴定功能基因.md
    ├── BSAseq.R
    ├── QTLseq_BSA.R
    ├── gatk_step1_Index.sh
    ├── gatk_step2_callvar.sh
    ├── gatk_step3_gvcfmerg.sh
    └── gatk_step4_filter_SNP.sh
├── Basic_Path.sh
├── Basic_R.r
├── Basic_Shell.sh
├── Code
    ├── .DS_Store
    ├── 01_Density.r
    ├── 02_IBS.r
    ├── 03_PI.r
    ├── 04_FST.r
    ├── 05_Drift_pi.r
    ├── 06_MDS.r
    ├── 07_PCA.r
    ├── 08_ExtractMap.r
    ├── 09_Admixture.sh
    ├── 10_Xp-clr.sh
    ├── 11_Xp-clr_Smooth.r
    ├── 12_Xp-clr_SelectGene.sh
    ├── 13_Xp-clr_BayenvOver.sh
    ├── 14_Xp-clr_Haplotype.sh
    ├── 15_Xp-clr_NegativeContral.r
    ├── 16_RDA.r
    ├── 17_Sample_Cluster.r
    ├── 18_Pheatmap.r
    ├── 19_Qmatrix_PieMap.r
    ├── 20_Qmatrix_Visual.r
    ├── 21_PCA_climate.r
    ├── 22_VIP_Gene_XpCLR_signal.r
    ├── 23_Gene_HaploType_Map.r
    ├── 24_Gene_HaploType.r
    ├── 25_Gene_snpEff.r
    ├── 26_envGWAS.r
    ├── 27_PPD.r
    ├── 28_Bayenv.r
    └── readme.txt
├── README.md
├── R_Code.Rproj
└── 小麦环境适应性
    ├── 01-环境变量聚类.R
    ├── 02-位点统计.R
    ├── 03-位点单倍型分析.R
    ├── 04-样本分群及遗传分析.R
    ├── 05_选择信号分析.R
    ├── 06-snpAttr.R
    └── 07_growthHabit.R


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # Example code in package build process
 9 | *-Ex.R
10 | 
11 | # Output files from R CMD build
12 | /*.tar.gz
13 | 
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | 
17 | # RStudio files
18 | .Rproj.user/
19 | 
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 | 
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 | 
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 | 
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 | 
35 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
36 | rsconnect/
37 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/01_Vmap1.1R/.DS_Store


--------------------------------------------------------------------------------
/01_Vmap1.1R/03_Xp-clr_Haplotype.sh:
--------------------------------------------------------------------------------
 1 | #working directory
 2 | #204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2
 3 | 
 4 | #extract gene_region VCF from 225 all vcf files
 5 | grep -w -f 87Gene_id.txt gene_v1.1_Lulab.gff3 | awk -F";" '{print $1}' |awk '{print $1"\t"$4-5000"\t"$5+5000"\t"$9}' | awk -F"\tID=" '{print $2"\t"$1}' > 87gene_5k.txt
 6 | #bash getVcf.sh 87gene_5k.txt
 7 | cat $1 |while read gene chr from to
 8 | do
 9 |     #echo $chr $from $to
10 |     #if echo $2 |grep -q '.*.vcf.gz$';then
11 |     #    vcftools --gzvcf $2 --chr $chr --from-bp $from --to-bp $to  --recode --recode-INFO-all --out $gene.$chr.$from-$to 
12 |     #elif echo $2 |grep -q '.*.vcf$';then
13 |         vcftools --gzvcf /data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/chr$chr.E6_Landrace_locate.vcf.gz --chr $chr --from-bp $from --to-bp $to  --recode --recode-INFO-all --out 5k.$gene.$chr.$from-$to
14 |     #fi
15 | done
16 | 
17 | #传到本地用Java Migration/Haplotype将vcf转换成单倍型txt格式，使用07_VCF_Haplotype_Visual.r进行可视化。
18 | #本地路径：/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/Gene/V2/TXT


--------------------------------------------------------------------------------
/01_Vmap1.1R/03_Xp-clr_NegativeContral.r:
--------------------------------------------------------------------------------
 1 | #XP-CLR negative contral
 2 | library(RColorBrewer)
 3 | library(ggplot2)
 4 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral")
 5 | dist <- read.table("dis_matrix.txt",header=T,stringsAsFactors = F)
 6 | out <- strsplit(dist[,1], ":")
 7 | dist$ID1 <- NA
 8 | dist$ID2 <- NA
 9 | a<-1
10 | for (i in c(1:length(out))){
11 |   dist[a,6] <- out[[i]][1]
12 |   dist[a,7] <- out[[i]][2]
13 |   a <- a+1
14 | }
15 | #dim(dist[which(dist$geopolitical_dis>100 & dist$TEMP_dis<10),])
16 | WA <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/WA.txt",header =F,stringsAsFactors = F)
17 | North1 <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/North1.txt",header=F,stringsAsFactors = F)
18 | North2 <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/North2.txt",header=F,stringsAsFactors = F)
19 | #Temperature
20 | pdf("Temperature_dist.pdf",height = 10,width = 10)
21 | #WA VS North1
22 | dist$Type1 <- NA
23 | dist[which((dist$ID1 %in% WA[,1] & dist$ID2 %in% North1[,1])) ,8] <- "Yes"
24 | dist[which((dist$ID1 %in% North1[,1] & dist$ID2 %in% WA[,1])) ,8] <- "Yes"
25 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) +
26 |   geom_point( size=3,alpha=0.2)+
27 |   theme_classic()+
28 |   ggtitle("WA VS North1")+
29 |   theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
30 | print(p)
31 | #WA VS North2
32 | dist$Type1 <- NA
33 | dist[which((dist$ID1 %in% WA[,1] & dist$ID2 %in% North2[,1])) ,8] <- "Yes"
34 | dist[which((dist$ID1 %in% North2[,1] & dist$ID2 %in% WA[,1])) ,8] <- "Yes"
35 | 
36 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) +
37 |   geom_point( size=3,alpha=0.2)+
38 |   theme_classic()+
39 |   ggtitle("WA VS North2")+
40 |   theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
41 | 
42 | print(p)
43 | 
44 | #North1 VS North2
45 | dist$Type1 <- NA
46 | dist[which((dist$ID1 %in% North2[,1] & dist$ID2 %in% North1[,1])),8] <- "Yes"
47 | dist[which((dist$ID1 %in% North1[,1] & dist$ID2 %in% North2[,1])),8] <- "Yes"
48 | 
49 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) +
50 |   geom_point( size=3,alpha=0.2)+
51 |   theme_classic()+
52 |   ggtitle("North1 VS North2")+
53 |   theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
54 | print(p)
55 | dev.off()
56 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/04_9VIP_Xp-clr_Haplotype.sh:
--------------------------------------------------------------------------------
 1 | #具体分析一些样本的单倍型，总共分成3类
 2 | #1. 保护祖先种的意义: 祖先种中存在的变异，在传播的过程中丢失。
 3 |   #eg(抗病): Sr33(TraesCS1D02G029100), Sr45(TraesCS1D02G040400), Tsn1(TraesCS5B02G059000)
 4 | #2. 保护地区特有品种的意义: 祖先种中没有，在传播的过程中适应区域内环境，形成新的变异。
 5 |   #eg(氮吸收，株高): NGR5_1(TraesCS1A02G242800), GID-A1(TraesCS1A02G255100), Rht-A1(TraesCS4A02G271000), Rht-B1(TraesCS4B02G043100)
 6 | #3. 单倍型在区域变化的例子。
 7 |   #eg(开花，光敏): TaFT8-2_1(TraesCS2A02G485000), TaPHYC_1(TraesCS5A02G391300)
 8 | 
 9 | #working directory
10 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2/VIP_genes
11 | #提取祖先的对应位点vcf
12 | #VIP_gene.txt(Old)
13 | TraesCS1D02G029100
14 | TraesCS1D02G040400
15 | TraesCS5B02G059000
16 | TraesCS1A02G242800
17 | TraesCS1A02G255100
18 | TraesCS4A02G271000
19 | TraesCS4B02G043100
20 | TraesCS2A02G485000
21 | TraesCS5A02G391300
22 | #VIP_gene.txt(New)
23 | Ppd-A1
24 | Ppd-1
25 | VRN2-2
26 | 
27 | #提取带祖先的样本
28 | grep -w -f VIP_gene.txt 87gene_5k.txt > VIPgene_5k.txt
29 | bash getVcf.sh VIPgene_5k.txt > VIPgene_5k.log
30 | #提取在六倍体里面分离的位点
31 | 
32 | for i in `cat VIP_gene.txt`; do sed '/#/d' *$i*.vcf | awk '{print $1"\t"$2}'; done > VIP_gene.pos
33 | 
34 | for i in `ls *vcf`; do vcftools --vcf $i --positions VIP_gene.pos --recode --out ${i::-11}.pos; done
35 | #1.传到本地用Java Migration/Haplotype将vcf转换成单倍型txt格式，使用07_VCF_Haplotype_Visual.r进行可视化。
36 | #本地路径：/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/VIP_gene/
37 | #2.使用04_9VIP_Gene_Seq_anno.r进行变异注释。
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/04_GeneStructureMa.r:
--------------------------------------------------------------------------------
 1 | #Ppd-1和VRN基因随纬度变化的structure分布
 2 | #Working diirectory: 203:yafei:/data1/home/yafei/008_Software/snpEff/Xp-clr_6VIP
 3 | library(pheatmap)
 4 | require(reshape)
 5 | require (rworldmap)
 6 | require(rworldxtra)
 7 | library(pophelper)
 8 | library(ggplot2)
 9 | require(gridExtra)
10 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/Structure")
11 | #load Qmatrix files
12 | #Ppd-1
13 | sfiles <- list.files(path="/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/Structure/Ppd-1", full.names=T)
14 | slist <- readQ(files=sfiles)
15 | tabulateQ(qlist=readQ(sfiles))
16 | summariseQ(tabulateQ(qlist=readQ(sfiles)))
17 | #VRN2-2
18 | sfiles <- list.files(path="/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/Structure/VRN2-2", full.names=T)
19 | slist <- readQ(files=sfiles)
20 | tabulateQ(qlist=readQ(sfiles))
21 | summariseQ(tabulateQ(qlist=readQ(sfiles)))
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/04_Qmatrix_Visual.r:
--------------------------------------------------------------------------------
 1 | #统一设置：注释内容及颜色
 2 | library(pheatmap)
 3 | require(reshape)
 4 | require (rworldmap)
 5 | require(rworldxtra)
 6 | library(pophelper)
 7 | library(ggplot2)
 8 | require(gridExtra)
 9 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/06_Structure/V1/Select")
10 | #load Qmatrix files
11 | sfiles <- list.files(path="/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/06_Structure/V1/Select/QMatrix", full.names=T)
12 | slist <- readQ(files=sfiles)
13 | tabulateQ(qlist=readQ(sfiles))
14 | summariseQ(tabulateQ(qlist=readQ(sfiles)))
15 | 
16 | pdf("Q.pdf",width = 24,height = 8)
17 | p1 <- plotQ(slist[1:7],returnplot=T,exportplot=F,quiet=T,basesize=11,
18 |             sortind="all",showindlab=F,showyaxis=T,showticks=T,sharedindlab=T,clustercol=brewer.pal(7, "Set2"))
19 | grid.arrange(p1$plot[[1]],p1$plot[[2]],p1$plot[[3]],p1$plot[[4]],p1$plot[[5]],nrow=5)
20 | dev.off()
21 | 
22 | library(readxl)
23 | library(ggmap)
24 | library(RColorBrewer)
25 | setwd("/Users/guoyafei/Documents/02_VmapIII/06_Selection/Fst/")
26 | group <- read.table("group.txt",header=F,stringsAsFactors = F)
27 | a <- read_excel("/Users/guoyafei/RstudioProjects/GitHub/R_Code/07_VMap3/Lulab_germplasm_Info.xlsx", sheet=1, na='NA')
28 | b<-a[which(a$`Taxa(vmap3)` %in% group$V3),c(1,11,12)]
29 | rownames(group)<-group$V3
30 | c <- cbind(b,group[b$`Taxa(vmap3)`,])
31 | 
32 | mp <- NULL
33 | mapworld <- borders("world",colour = "gray70",fill="white") 
34 | mp <- ggplot() + 
35 |   mapworld + 
36 |   #xlim(0,90) +
37 |   #ylim(10,70) + 
38 |   theme_classic()
39 | color <- brewer.pal(8, "Dark2")[c(1,2,3,4,5,6,7)]
40 | color <- brewer.pal(8, "Dark2")[1:7]
41 | data <- c[which(c$V1=="domemmer"),]
42 | data <- c[which(c$V1=="freethresh"),]
43 | data <- c[which(c$V1=="wildemmer"),]
44 | #AABBDD
45 | sub <- data[which(data$Region %in% c("EU","WA","EA2","SH","IA")),]
46 | mp+geom_point(aes(x=sub$Longitude, y=sub$Latitude, color=sub$Region,alpha=0.3),size=1)+
47 |   scale_size(range=c(1,1)) + 
48 |   scale_color_manual(values = color) +
49 |   #theme_classic() +
50 |   #theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+
51 |   #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 
52 |   #                   breaks=c("AA", "SS", "DD", "AABB","AABBDD"),
53 |   #                   labels=c("AA", "SS", "DD", "AABB","AABBDD"))+
54 |   theme(axis.text = element_blank(),
55 |         axis.ticks = element_blank(),
56 |         axis.title = element_blank(),
57 |         panel.border = element_blank())
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/04_VIP_Gene_XpCLR_signal.r:
--------------------------------------------------------------------------------
 1 | library(qqman)
 2 | #画信号位点基因的XP-CLR的染色体信号
 3 | #Working directory
 4 | #xuebo@204:/data2/xuebo/Projects/Speciation/xpclr/Selection_V3/smooth/lineage_V2/Top5%/VIP_gene_XpCLR
 5 | #VIP_gene
 6 | #1. TraesCS4D02G364400	Vrn2-2	24	58277633	58279728	chr4D:509282253-509284348(-)	Transcription factor GHD7 [UniProtKB/Swiss-Prot:E5RQA1]	Os10g0560400	NA
 7 | #2. TraesCS2D02G079600	Ppd-1(PRR)	11	33952048	33956269	chr2D:33952048-33956269(-)	Two-component response regulator-like PRR37 [UniProtKB/Swiss-Prot:A2YQ93]	Os07g0695100	NA
 8 | #3. TraesCS2A02G081900	Ppd-A1	7	36933684	36938202	chr2A:36933684-36938202(-)	Two-component response regulator-like PRR37 [UniProtKB/Swiss-Prot:A2YQ93]	Os07g0695100	NA
 9 | #4. TraesCS5A02G473800	Q-5A	26	196896739	196900381	chr5A:650127258-650130900(-)	APETALA2-like protein 2 [UniProtKB/Swiss-Prot:Q84TB5]	Os03g0818800	NA
10 | #5. TraesCS4B02G043100	Rht-B1	21	30861268	30863723	chr4B:30861268-30863723(+)	DELLA protein RHT-1 [UniProtKB/Swiss-Prot:Q9ST59]	Os03g0707600	AT1G66350
11 | #6. TraesCS1D02G029100	Sr33	5	11451423	11459353	chr1D:11451423-11459353(+)	Disease resistance protein RGA5 [UniProtKB/Swiss-Prot:F7J0N2]	NA	AT3G46530
12 | #7. TraesCS1D02G040400	Sr45	5	19341296	19346065	chr1D:19341296-19346065(+)	Putative disease resistance protein RGA4 [UniProtKB/Swiss-Prot:Q7XA39]	Os10g0130800	N
13 | 
14 | #File Gene_Name Chr Start Stop
15 | #North2_South_smooth_A Ppd-A1 7	36933684	36938202
16 | #North2_South_smooth_B Rht-B1 21	30861268	30863723
17 | #North2_South_smooth_D Sr45 5	19341296	19346065
18 | #WA_EU_smooth_A  Q-5A 26	196896739	196900381
19 | #WA_EU_smooth_D  Sr45 5	19341296	19346065
20 | #EU_South_smooth_B Rht-B1 21	30861268	30863723
21 | #EU_South_smooth_D Flowering1 24	58277633	58279728
22 | #EU_South_smooth_D Sr45 5	19341296	19346065
23 | #WA_South_smooth_D Flowering1 24	58277633	58279728
24 | #WA_South_smooth_D Sr33 5	11451423	11459353
25 | #Tibet_South_smooth_D  Ppd-1(PRR) 11	33952048	33956269
26 | 
27 | cat $1 |while read file chr1 chr2
28 | do
29 | awk '{if($1== "'${chr1}'" || $1=="'${chr2}'") {print $0}}' ../../${file}| sort -k1,1n -k2,2g |sed '1i  Chr\tWindowStart\tWindowStop\tSNPcount\tMeanY\tWstat' > ${file::-4}.${chr1}.${chr2}.txt
30 | done
31 | 
32 | #把42条染色体合并成21条
33 | #转移到本地画图
34 | #/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3
35 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3")
36 | #批量读取Xp-clr结果文件
37 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3/TXT" ##文件目录
38 | fileNames <- dir(path)  ##获取该路径下的文件名
39 | filePath <- sapply(fileNames, function(x){ 
40 |   paste(path,x,sep='/')})   ##生成读取文件路径
41 | data <- lapply(filePath, function(x){
42 |   read.table(x, header=T,stringsAsFactors = F)})
43 | 
44 | #读取阈值文件
45 | thresHold <- read.table("thresHold.txt",header=T,stringsAsFactors = F)
46 | rownames(thresHold) <- thresHold$GEO_region
47 | #读取着丝粒文件
48 | centromer <- read.table("centromerer.txt",header=T,stringsAsFactors = F)
49 | rownames(centromer) <- centromer$chr
50 | 
51 | #读取VIP基因文件
52 | gene <- read.table("VIP_gene.txt",header=T,stringsAsFactors = F)
53 | rownames(gene) <- paste(gene$File,gene$Position,sep=".")
54 | out1 <- strsplit(sub('.chr',':chr', names(data)), ":")
55 | out2 <- strsplit(sub('.txt',':txt', names(data)), ":")
56 | 
57 | pdf("Xp-clr.pdf",width = 10,height = 5)
58 | for(i in c(1:length(data))){
59 |   name <- out1[[i]][1]
60 |   tit <- out2[[i]][1]
61 |   chr <- strsplit(out1[[i]][2], ".txt")[[1]][1]
62 |   title <- paste(gene[tit,1],gene[tit,2],sep=":")
63 |   cen <- centromer[chr,4]
64 |   plot(data[[i]]$WindowStart/1000000, data[[i]]$MeanY, ylim = c(-2,50),axes = T,col = "skyblue",type="l",cex = 2,lwd = 3,ann = F)
65 |   abline(v = gene[tit,7]/1000000, col = "red", cex=2,lwd = 1)
66 |   abline(h=thresHold[name,3], col = "red", lty = 3,cex=2,lwd = 2)
67 |   title(title,xlab= chr, ylab = 'Xp-clr', font.lab = 1, cex.lab = 1.5)
68 |   points(cen/1000000,0, pch=20,cex=2,col="grey")
69 | }
70 | dev.off()
71 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/05_Sample_Cluster.r:
--------------------------------------------------------------------------------
 1 | library(cluster)
 2 | library(factoextra)
 3 | #聚类
 4 | #根据经纬度给样本聚类
 5 | #data <- read.table("land.txt", header=T, sep="\t", stringsAsFactors = F)
 6 | #dataA <- data[,c(3,4)]
 7 | #data2 <- dataA[!is.na(dataA$Latitude),]
 8 | 
 9 | data <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/04_bayenv/V1/225env.txt", header=F,stringsAsFactors = F)
10 | colname <- c("elevation","temp1","temp2","temp3","temp4","temp5","temp6","temp7","temp8","temp9","temp10","temp11","prec1","prec2","prec3","prec4","prec5","prec6","prec7","prec8","Latitude","Logititude")
11 | rownames(data) <- data[,1]
12 | data2 <- data[!is.na(data$V6),-1]
13 | colnames(data2) <- colname
14 | df = scale(data2,center = T,scale = T)
15 | colnames(df) <- colname
16 | #按列进行标准化
17 | #先求样本之间两两相似性
18 | result <- dist(df, method = "euclidean")
19 | #使用指定距离来计算数据矩阵行之间的距离
20 | #euclidean：欧几里得距离
21 | #maximum：最大距离
22 | #manhattan：绝对距离
23 | #canberra：堪培拉距离
24 | #minkowski：闵可夫斯基距离
25 | #产生层次结构
26 | result_hc <- hclust(d = result, method = "ward.D2")
27 | #Ward: 最小方差方法旨在寻找紧凑的球形簇的完整的联动方法找到相似集群。
28 | #有两种不同的算法，"ward.D"（相当于只沃德选择"ward"不执行沃德（1963）聚类准则）& "ward.D2"实现了标准（Murtagh的及Legendre 2014）在集群更新之前对差异进行平方。注意agnes(*, method="ward")对应于hclust(*, "ward.D2").
29 | #median和centroid在不导致单调的距离测量，或者等效产生的树状图可以具有所谓的倒置或颠倒。
30 | 
31 | data2$type <- cutree(result_hc, k=30)
32 | lat_mean <- tapply(data2[,21],data2$type,mean,na.rm = TRUE)
33 | lon_mean <- tapply(data2[,22],data2$type,mean,na.rm = TRUE)
34 | data2$cluster1 <- NA
35 | data2$cluster2 <- NA
36 | for(i in 1:224) {
37 |   for(j in 1:30){
38 |     if(data2[i,23] == j ){
39 |       data2[i,24] <- as.numeric(lat_mean[j])
40 |       data2[i,25] <- as.numeric(lon_mean[j])
41 |     } 
42 |   }
43 | }
44 | write.table(data2,"30_cluster.txt",sep="\t",row.names = F,quote=F)
45 | 
46 | library(maps)
47 | mp<-NULL
48 | mapworld<-borders("world",colour = "gray70",fill="gray70") 
49 | mp<-ggplot()+mapworld+ylim(-90,90)
50 | mp_40<-mp+geom_point(aes(x=re$Longitude_40, y=re$Latitude_40))+
51 |   scale_size(range=c(1,1))+ 
52 |   theme_classic()


--------------------------------------------------------------------------------
/01_Vmap1.1R/06_Qmatrix_PieMap.r:
--------------------------------------------------------------------------------
 1 | #统一设置：注释内容及颜色
 2 | library(pheatmap)
 3 | require(reshape)
 4 | require (rworldmap)
 5 | require(rworldxtra)
 6 | library(RColorBrewer)
 7 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5")
 8 | annotation_col <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Taxa_Region_225.txt",header=T,stringsAsFactors = F)
 9 | rownames(annotation_col) = c(1:325)
10 | seq <- annotation_col[,1]
11 | #cluster samples: 按照算Xp-clr的区域划分
12 | out <- annotation_col[,c(4,5)]
13 | lat_mean <- tapply(out[,1],out$Region_sub2,mean,na.rm = TRUE)
14 | lon_mean <- tapply(out[,2],out$Region_sub2,mean,na.rm = TRUE)
15 | out$cluster1 <- NA
16 | out$cluster2 <- NA
17 | for(i in 1:225) {
18 |         for(j in names(lat_mean)){
19 |                 if(out[i,3] == j ){clas
20 |                         out[i,4] <- as.numeric(lat_mean[j])
21 |                         out[i,5] <- as.numeric(lon_mean[j])
22 |                 } 
23 |           }
24 | }
25 | mode <- as.data.frame(cbind(annotation_col[,2],as.numeric(out[,4]),as.numeric(out[,5])),stringsAsFactors = F)
26 | mode$V2 <- as.numeric(mode$V2)
27 | mode$V3 <- as.numeric(mode$V3)
28 | #map
29 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/TXT" ##文件目录
30 | fileNames <- dir(path)  ##获取该路径下的文件名
31 | filePath <- sapply(fileNames, function(x){ 
32 |         paste(path,x,sep='/')})   ##生成读取文件路径
33 | data <- lapply(filePath, function(x){
34 |         read.table(x, header=F,stringsAsFactors = F)})
35 | pdf("cut_8_piemap.pdf")
36 | for (i in c(1:82)){
37 |         all <- as.matrix(data[[i]])
38 |         colnames(all) <- c(1:361)
39 |         all <- all[,seq]
40 |         data.e <- dist(t(all))
41 |         model <- hclust(data.e,method = "complete")
42 |         #plot(model)
43 |         result <- as.numeric(cutree(model, k=8))
44 |         out2 <- as.data.frame(cbind(mode,result))
45 |         out2$value <- 1
46 |         colnames(out2)[1:4] <- c("Latitude", "Logititude", "Sub.population.2", "value")
47 |         wheat2 = out2[!is.na(out2$Latitude),]
48 |         wheat = wheat2[!is.na(wheat2$Sub.population.2),]
49 |         wheat_reshape <- cast(test,Latitude+Logititude~Sub.population.2) 
50 |         wheat_reshape2 <- as.data.frame(wheat_reshape)
51 |         mapPies(wheat_reshape2,xlim=c(-120,140),ylim=c(0,40),nameX="Logititude",nameY="Latitude",nameZs=c("1","2","3"),symbolSize=1,
52 |                 zColours=brewer.pal(8, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main=i)
53 | }
54 | dev.off()
55 | 
56 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/10_Density.r:
--------------------------------------------------------------------------------
 1 | #install.packages('RIdeogram')
 2 | require(RIdeogram)
 3 | 
 4 | setwd("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/13_Plots/01_Density")
 5 | #gene_density <- read.table("ArinaLrFor_LTR_1.txt", header=T,stringsAsFactors = F)
 6 | gene_density2 <- read.table("gene_density.txt", header=T, stringsAsFactors = F)
 7 | gene_density <- read.table("21-1M_VMap3_SnpDensity.txt", header=T, stringsAsFactors = F)
 8 | wheat_karyotype <- read.table("wheat_karyotype.txt", header=T, stringsAsFactors = F)
 9 | ideogram(karyotype = wheat_karyotype, overlaid = gene_density2)
10 | convertSVG("chromosome.svg", device = "pdf")
11 | 
12 | #wheat_karyotype
13 | #Chr Start       End  CE_start    CE_end
14 | #1     0 248956422 122026459 124932724
15 | #2     0 242193529  92188145  94090557
16 | 
17 | #gene_density
18 | #Chr   Start     End Value
19 | #1       1 1000000    65
20 | #1 1000001 2000000    76
21 | 
22 | 
23 | 
24 | library(CMplot)
25 | setwd("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据")
26 | mydata<-read.table("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据/fastcall2_001_pos.txt",header=TRUE,sep="\t")
27 | head(mydata)
28 | # snp chr pos
29 | # snp1_1  1 2041
30 | # snp1_2  1 2062
31 | # snp1_3  1 2190
32 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 
33 | mydata<-read.table("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据/fastcall_001_pos.txt",header=TRUE,sep="\t")
34 | head(mydata)
35 | # snp         chr       pos
36 | # snp1_1    1        2041
37 | # snp1_2    1        2062
38 | # snp1_3    1        2190
39 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 
40 | 
41 | 
42 | #lineage density
43 | A <- read.table("Alineage.txt",header=F,stringsAsFactors = F)
44 | ggplot(A, mapping = aes(x = V4)) +
45 |   geom_density( alpha = 0.5, color = "#999999",fill="#999999") +
46 |   #geom_density(fill = "blue",alpha = 0.3,color="blue")+
47 |   theme_bw()
48 |   
49 | 
50 | B <- read.table("Blineage.txt",header=F,stringsAsFactors = F)
51 | ggplot(B, mapping = aes(x = V4)) +
52 |   geom_density( alpha = 0.5, color = "#377EB8",fill="#377EB8") +
53 |   #geom_density(fill = "blue",alpha = 0.3,color="blue")+
54 |   theme_bw()
55 | 
56 | D <- read.table("Dlineage.txt",header=F,stringsAsFactors = F)
57 | ggplot(D, mapping = aes(x = V4)) +
58 |   geom_density( alpha = 0.5, color = "#FF7F00",fill="#FF7F00") +
59 |   #geom_density(fill = "blue",alpha = 0.3,color="blue")+
60 |   theme_bw()
61 | 
62 | ggplot(D, mapping = aes(x = V4),color = "blue",fill="blue") +
63 |   geom_line(stat = "density")
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/13_Xp-clr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | sleep 4800s
 3 | for i in {1..42}
 4 | do
 5 |   WGS --model vcf --type toXPCLR --group1 ../groupTaxa/EA.txt --rec ../../slidewindow_recomrate_updown_20M.txt --chr $i --file /data2/xuebo/Projects/Speciation/E6/chr${i}.Land.vcf.gz --out groupEAChr${i} &
 6 | done
 7 | 
 8 | #!/bin/bash
 9 | for i in {1..42}
10 | do
11 | 	XPCLR -xpclr ../groupEA/groupEAChr${i}.geno ../groupWA/groupWAChr${i}.geno ../groupWA/groupWAChr${i}.snp EA_WA_10kchr${i} -w1 0.005 500 10000 $i -p1 0.95 &
12 | done
13 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/15_PCA.r:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral")
 2 | library("FactoMineR")
 3 | library("ggplot2")
 4 | library("factoextra")
 5 | Elevation <- read.csv("Elevation.txt",fill=TRUE, row.names = 1,na.strings = "",header = T, sep="\t", stringsAsFactors = F)
 6 | Temp <- read.table("Temp.txt", header=T,row.names = 1,stringsAsFactors = F)
 7 | Prec <- read.table("Prec.txt",header=T,row.names = 1,stringsAsFactors = F)
 8 | bio <- read.table("20bio.txt",header=T,stringsAsFactors = F)
 9 | dt <- as.matrix(scale(bio[!duplicated(bio, fromLast=TRUE),]))
10 | rm1<-cor(dt)
11 | rm1
12 | rs1<- eigen(rm1)
13 | #提取结果中的特征值，即各主成分的方差；
14 | val <- rs1$values
15 | 
16 | #换算成标准差(Standard deviation);
17 | (Standard_deviation <- sqrt(val))
18 | #计算方差贡献率和累积贡献率；
19 | (Proportion_of_Variance <- val/sum(val))
20 | (Cumulative_Proportion <- cumsum(Proportion_of_Variance))
21 | #碎石图绘制;
22 | par(mar=c(6,6,2,2))
23 | plot(rs1$values,type="b",
24 |      cex=2,
25 |      cex.lab=2,
26 |      cex.axis=2,
27 |      lty=2,
28 |      lwd=2,
29 |      xlab = "PC",
30 |      ylab="Eigenvalue (Principal Component Variance)")
31 | #提取结果中的特征向量(也称为Loadings,载荷矩阵)；
32 | (U<-as.matrix(rs1$vectors))
33 | #进行矩阵乘法，获得PC score；
34 | PC <- dt %*% U
35 | colnames(PC) <- paste("PC",1:20,sep="")
36 | head(PC)
37 | 
38 | #将iris数据集的第5列数据合并进来；
39 | #df<-data.frame(PC, dt_all$ploidy, dt_all$Region)
40 | #head(df)
41 | 
42 | library(ggplot2)
43 | #提取主成分的方差贡献率，生成坐标轴标题；
44 | xlab<-paste0("PC1(",round(Proportion_of_Variance[1]*100,2),"%)")
45 | ylab<-paste0("PC2(",round(Proportion_of_Variance[2]*100,2),"%)")
46 | #绘制散点图并添加置信椭圆；
47 | p1<-ggplot(data = df,aes(x=PC1,y=PC2,color=df$dt_all.ploidy))+
48 |   stat_ellipse(aes(fill=df$dt_all.ploidy),
49 |                type ="norm", geom ="polygon",alpha=0.2,color=NA)+
50 |   geom_point()+labs(x=xlab,y=ylab,color="")+
51 |   guides(fill=F)+
52 |   theme_classic()
53 | p2<-ggplot(data = PC,aes(x=PC1,y=PC2))+
54 |     geom_point()+labs(x=xlab,y=ylab,color="")+
55 |   guides(fill=F)+
56 |   theme_classic()
57 | p1
58 | p2
59 | 
60 | #------
61 | dt_all$lat_bin <- NA
62 | dt_all[which(dt_all$lat >= -45 & dt_all$lat < -35),24] <- "[-45,-35)"
63 | dt_all[which(dt_all$lat >= -35 & dt_all$lat < -25),24] <- "[-35,-25)"
64 | dt_all[which(dt_all$lat >= -25 & dt_all$lat < -15),24] <- "[-25,-15)"
65 | dt_all[which(dt_all$lat >= -15 & dt_all$lat < -5),24] <- "[-15,-5)"
66 | dt_all[which(dt_all$lat >= -5 & dt_all$lat < 5),24] <- "[-5,5)"
67 | dt_all[which(dt_all$lat >= 5 & dt_all$lat < 15),24] <- "[5,15)"
68 | dt_all[which(dt_all$lat >= 15 & dt_all$lat < 25),24] <- "[15,25)"
69 | dt_all[which(dt_all$lat >= 25 & dt_all$lat < 35),24] <- "[25,35)"
70 | dt_all[which(dt_all$lat >= 35 & dt_all$lat < 45),24] <- "[35,45)"
71 | dt_all[which(dt_all$lat >= 45 & dt_all$lat < 55),24] <- "[45,55)"
72 | dt_all[which(dt_all$lat >= 55 & dt_all$lat < 65),24] <- "[55,65)"
73 | dt_all[which(dt_all$lat >= 65 & dt_all$lat < 75),24] <- "[65,75)"
74 | df$lat_bin <- dt_all$lat_bin
75 | p2<-ggplot(data = df,aes(x=PC1,y=PC2,color=df$lat_bin))+
76 |   stat_ellipse(aes(fill=df$dt_all.Region),
77 |                type ="norm", geom ="polygon",alpha=0.2,color=NA)+
78 |   geom_point()+labs(x=xlab,y=ylab,color="")+
79 |   guides(fill=F)+
80 |   theme_classic()
81 | p1
82 | 
83 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/15_PCA_climate.r:
--------------------------------------------------------------------------------
 1 | #working directory
 2 | #/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/NegativeContral
 3 | library(RColorBrewer)
 4 | library(ggmap)
 5 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral")
 6 | location <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/01_IBS/04_795taxaIBS/795_Location.txt",header=T,stringsAsFactors = F)
 7 | 
 8 | Elevation <- read.table("Elevation.txt",header=T,stringsAsFactors = F)
 9 | ele <- merge(Elevation,location,by="ID")
10 | colnames(ele)[2] <-"PC1"
11 | 
12 | Temp <- read.table("temp_PC.txt",header=T,stringsAsFactors = F)
13 | Temp$ID <- rownames(Temp)
14 | tem <- merge(Temp,location,by="ID")
15 | 
16 | Prec <- read.table("Prec_PC.txt",header=T,stringsAsFactors = F)
17 | Prec$ID <- rownames(Prec)
18 | pre <- merge(Prec,location,by="ID")
19 | 
20 | bio <- read.table("20bio_PC.txt",header=T,row.names = 1)
21 | #bio$ID <- rownames(bio)
22 | #bio <- merge(bio,location,by="ID")
23 | 
24 | data <- list(ele,tem,pre)
25 | pdf("20bio_PC1.pdf",width = 12,height = 7.5)
26 | for(i in c(1:2)){
27 | mp <- NULL
28 | mapworld <- borders("world",colour = "grey90",fill="white") 
29 | mp <- ggplot()+mapworld+ylim(-60,90)+xlim(-25,200)
30 | mp2 <- mp+geom_point(aes(x=bio$Logititude,y=bio$Latitude,color=bio$PC1),size=2.6)+scale_size(range=c(1,1))+ 
31 |   scale_colour_gradientn(colours = brewer.pal(11, "RdYlBu"))+
32 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))+
33 |   theme(panel.grid = element_blank()) + 
34 |   #theme(panel.grid =element_blank()) +   ## 删去网格线
35 |   #theme(axis.text = element_blank()) +   ## 删去刻度标签
36 |   #theme(axis.ticks = element_blank()) +   ## 删去刻度线
37 |   theme(panel.border = element_blank())
38 | print(mp2)
39 | }
40 | dev.off()
41 | 
42 | 
43 | #看筛选样本的分布位点
44 | #taxa <- read.table("group_V2/max_temp.txt",header=F,stringsAsFactors = F)
45 | #colnames(taxa)[1] <-"ID"
46 | #max <- merge(taxa,location,by="ID")
47 | 
48 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/16_Barcode.r:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Desktop/")
 2 | data <- read.table("Barcode",header=F,stringsAsFactors = F)
 3 | data <- as.data.frame.matrix(data)
 4 | mylist <- vector( mode = "list" )
 5 | for(i in 1:108){ 
 6 |   if(length(strsplit(data[i,1],"")[[1]])==4){
 7 |     for(j in 1:2){
 8 |       print(j)
 9 |       print(substr(data[i,1], j, j+2))
10 |     }}else if(length(strsplit(data[i,1],"")[[1]])==5){
11 |       for(j in 1:2){
12 |         print(j)
13 |         print(substr(data[i,1], j, j+3))
14 |       }
15 |     } else if(length(strsplit(data[i,1],"")[[1]])==6){
16 |       for(j in 1:2){
17 |         print(j)
18 |         print(substr(data[i,1], j, j+4))
19 |       }} else {
20 |         for(j in 1:2){
21 |           print(j)
22 |           print(substr(data[i,1], j, j+5))
23 |         }
24 |       }
25 | }
26 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/17_CreateHmp.r:
--------------------------------------------------------------------------------
 1 | rs#     alleles chrom   pos     strand  assembly#       center  protLSID        assayLSID       panelLSID       QCcode
 2 | data <- read.table("output.txt",header=T,stringsAsFactors = F)
 3 | pos <- read.table("WorldFilterd.pos",header=T,stringsAsFactors = F)
 4 | colnames(data)[4] <- "rs#"
 5 | colnames(data)[5] <- "strand"
 6 | data[,5] <- NA
 7 | colnames(data)[6] <- "chrom"
 8 | colnames(data)[7] <- "pos"
 9 | colnames(data)[1] <- "assembly#"
10 | colnames(data)[2] <- "center"
11 | colnames(data)[3] <- "protLSID"
12 | colnames(data)[8] <- "assayLSID"
13 | 
14 | alleles <- paste(pos[,2],pos[,3],sep="/")
15 | data$alleles <- alleles
16 | data$panelLSID <- NA
17 | data$QCcode <- NA
18 | data[,1] <- NA
19 | data[,2] <- NA
20 | data[,3] <- NA
21 | data[,8] <- NA
22 | all <- data[,c(4,755,6,7,5,1,2,3,8,756,757,9:754)]
23 | all2 <- all[which(all$chrom != "NA"),]
24 | write.table(all2,"World.hmp.txt",quote=F,row.names = F,sep="\t")
25 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/18_Daf.r:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/07_Daf")
 2 | A <- read.table("A_daf",header=F,stringsAsFactors = F)
 3 | B <- read.table("B_daf",header=F,stringsAsFactors = F)
 4 | D <- read.table("D_daf",header=F,stringsAsFactors = F)
 5 | library("ggplot2")
 6 | ggplot(all, aes(x=V1,fill=lineage)) +
 7 |   facet_grid(lineage ~ .)+
 8 |   geom_histogram(aes(y=..density..),binwidth=0.02, alpha=.7)+ 
 9 |   theme_classic()+
10 |   theme(axis.text.x=element_text(size=200),
11 |         axis.text.y=element_text(size=300,face="plain"),
12 |         axis.title.y=element_text(size = 300,face="plain"))
13 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/21_LDjump.r:
--------------------------------------------------------------------------------
1 | require(LDJump)
2 | results = LDJump("/data1/home/yafei/LDJump-test/HatLandscapeN16Len1000000Nrhs15_th0.01_540_1.fa", alpha = 0.05, segLength = 1000, pathLDhat = "/data1/home/yafei/Software/LDhat-master/", pathPhi = "/data1/home/yafei/Software/PhiPack/Phi", format = "fasta", refName = NULL)
3 | postscript("Results.eps", horiz = F)
4 | plot(results[[1]], xlab = "Segments", ylab = "Estimated Recombination Rate", main = "Estimated recombination map with LDJump")
5 | dev.off()


--------------------------------------------------------------------------------
/01_Vmap1.1R/22_Maf.r:
--------------------------------------------------------------------------------
 1 | #MAF-frequence
 2 | setwd("//Users/guoyafei/Downloads/Results/Impute/")
 3 | input <- read.table("freq_stat_input.frq", header=TRUE,stringsAsFactors = F)
 4 | output <- read.table("freq_stat.frq",header=TRUE,stringsAsFactors = F)
 5 | xlab <- c("0.05-0.1","0.10-0.15","0.15-0.20","0.20-0.25","0.25-0.30","0.30-0.35","0.35-0.40","0.40-0.45","0.45-0.50")
 6 | a <- c(40081,26663,39421,45852,45121,34408,21774,8301,4469)
 7 | b <- a/266090
 8 | 
 9 | > length(which(output[,5] >= 0.05 &output[,5] < 0.1))
10 | [1] 40081
11 | > length(which(output[,5] >= 0.1 &output[,5] < 0.15))
12 | [1] 26663
13 | > length(which(output[,5] >= 0.15 &output[,5] < 0.2))
14 | [1] 39421
15 | > length(which(output[,5] >= 0.2 &output[,5] < 0.25))
16 | [1] 45852
17 | > length(which(output[,5] >= 0.25 &output[,5] < 0.3))
18 | [1] 45121
19 | > length(which(output[,5] >= 0.3 &output[,5] < 0.35))
20 | [1] 34408
21 | > length(which(output[,5] >= 0.35 &output[,5] < 0.4))
22 | [1] 21774
23 | > length(which(output[,5] >= 0.4 &output[,5] < 0.45))
24 | 8301
25 | > length(which(output[,5] >= 0.45 &output[,5] < 0.5))
26 | 4469
27 | plot(b,ylim=c(0,0.5),xlim=c(1,9),cex=3)
28 | 
29 | xlab2 <- c("0.00-0.01","0.01-0.02","0.02-0.03","0.03-0.04","0.04-0.05")
30 | length(which(output[,5] > 0.0 &output[,5] < 0.01))
31 | 1631338
32 | length(which(output[,5] >= 0.01 &output[,5] < 0.02))
33 | 292504
34 | length(which(output[,5] >= 0.02 &output[,5] < 0.03))
35 | 119416
36 | length(which(output[,5] >= 0.03 &output[,5] < 0.04))
37 | 45907
38 | length(which(output[,5] >= 0.04 &output[,5] < 0.05))
39 | 21405
40 | a <- c(1631338,292504,119416,45907,21405)
41 | b <- a/2110570
42 | plot(b,ylim=c(0,1),xlim=c(1,5),cex=3)
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/24_MergeTest.r:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Downloads/Results/ChangeFormat/test_merge")
 2 | test.Exome <- read.table("test.Exome.vcf", head = FALSE, stringsAsFactors = FALSE)
 3 | test.Trancing <- read.table("test.Trancing.vcf", head = FALSE, stringsAsFactors = FALSE)
 4 | test.World <- read.table("test.World.vcf", head = FALSE, stringsAsFactors = FALSE)
 5 | test.merge <- read.table("test.merge.vcf", head = FALSE, stringsAsFactors = FALSE)
 6 | 
 7 | a <- paste(test.Trancing[,1],test.Trancing[,2],sep="_")
 8 | b <- paste(test.World[,1],test.World[,2],sep="_")
 9 | c  <- paste(test.merge[,1],test.merge[,2],sep="_")
10 | d <- paste(test.Exome[,1],test.Exome[,2],sep="_")
11 | venn.diagram(x= list(FileT = a, FileE= d,FileW = b,Merge = c), 
12 |              filename = "merge.png", height = 450, width = 450,resolution =300, 
13 |              imagetype="png", col ="transparent", 
14 |              fill =c("cornflowerblue","green","yellow","darkorchid1"),
15 |              alpha = 0.5, 
16 |              label.col = c("orange", "white","darkorchid4", "white", "white", "white",    "white", "white","darkblue", "white", "white", "white","white", "darkgreen", "white"), 
17 |              cex = 0.45,fontfamily = "serif", fontface = "bold",
18 |              cat.col =c("darkblue", "darkgreen", "orange","darkorchid4"), 
19 |              cat.cex = 0.45,cat.pos = 0, cat.dist = 0.07,cat.fontfamily = "serif", 
20 |              rotation.degree = 270)
21 | 
22 | ab <- a[a%in%b]
23 | ad <- a[a%in%d]
24 | abd <- d[d%in%ab]
25 | a[-which(a%in%c)]


--------------------------------------------------------------------------------
/01_Vmap1.1R/25_MergeVCF.r:
--------------------------------------------------------------------------------
  1 | setwd("/Users/guoyafei/Downloads/Results/")
  2 | W.pos <- read.table("World.pos",header=T,stringsAsFactors = F)
  3 | B.pos <- read.table("sorted.pos",header=F,stringsAsFactors = F)
  4 | colnames(B.pos) <- c("psid", "alle1", "alle2")
  5 | World <- merge(W.pos, B.pos, by="psId",all.x=TRUE)
  6 | rm(W.pos)
  7 | 
  8 | Trancing <- read.table("Trancing.SNPs.pos",header = T,stringsAsFactors = F)
  9 | Exome <- read.table("Exome.pos",header=F,stringsAsFactors = F)
 10 | 
 11 | W <- paste(World[,2],World[,3],sep="")
 12 | T <- paste(Trancing[,1],Trancing[,2],sep="")
 13 | E <- paste(Exome[,1],Exome[,2],sep="")
 14 | 
 15 | newW <- cbind(W,World[,1],World[,4],World[,5])
 16 | newW <- as.data.frame(newW)
 17 | colnames(newW) <- c("chr","psid","alleWA","alleWB")
 18 | newE <- cbind(E,Exome[,3],Exome[,4])
 19 | newE <- as.data.frame(newE)
 20 | colnames(newE) <- c("chr","alleEA","alleEB")
 21 | newT <- cbind(T,Trancing[,3],Trancing[,4])
 22 | newT <- as.data.frame(newT)
 23 | colnames(newT) <- c("chr","alleTA","alleTB")
 24 | rm(Exome)
 25 | rm(Trancing)
 26 | 
 27 | newWT <- merge(newW,newT,by="chr",all.x=TRUE)
 28 | newWTE <- merge(newWT, newE,by="chr",all.x=TRUE)
 29 | 
 30 | index <- apply(newWTE,1,function(x) length(which(is.na(x))))
 31 | new <- newWTE[which(index < 4),]
 32 | G <- apply(new,1,function(x) length(which(x=="G")))
 33 | A <- apply(new,1,function(x) length(which(x=="A")))
 34 | T <- apply(new,1,function(x) length(which(x=="T")))
 35 | C <- apply(new,1,function(x) length(which(x=="C")))
 36 | 
 37 | new$G <- G
 38 | new$A <- A
 39 | new$T <- T
 40 | new$C <- C
 41 | 
 42 | delete <- new[apply(new,1,function(x) length(which(x==0))!=2),]
 43 | select <- new[apply(new,1,function(x) length(which(x==0))==2),]
 44 | 
 45 | for (i in 1:14089) {
 46 |   if (is.na(select[i,5])){
 47 |     select[i,5] <- select[i,7]
 48 |     select[i,6] <- select[i,8]
 49 |   } 
 50 | }
 51 | 
 52 | rm(A)
 53 | rm(C)
 54 | rm(E)
 55 | rm(G)
 56 | rm(i)
 57 | rm(index)
 58 | rm(T)
 59 | rm(W)
 60 | rm(new)
 61 | rm(newE)
 62 | rm(newT)
 63 | rm(newW)
 64 | rm(newWT)
 65 | rm(newWTE)
 66 | rm(World)
 67 | 
 68 | newData <- select[,c(1,4,5)]
 69 | 
 70 | WorldwideResult <- read.table("WorldwideResult",header=T,stringsAsFactors = F)
 71 | 
 72 | deleteID <- as.character(delete[,2])
 73 | all <- WorldwideResult[,1]
 74 | B.ID <- B.pos[,1]
 75 | 
 76 | num1 <- which(all %in% deleteID)
 77 | WorldFilterd <- WorldwideResult[-num1,]
 78 | 
 79 | num2 <- which(B.ID %in% deleteID)
 80 | Bpos.filted <- B.pos[-num2,]
 81 | 
 82 | 
 83 | pos.merge <- merge(Bpos.filted, select, by="psid",all.x=TRUE)
 84 | pos.merge[,7] <- as.character(pos.merge[,7])
 85 | pos.merge[,8] <- as.character(pos.merge[,8])
 86 | 
 87 | 
 88 | for (i in 1:102690) {
 89 |   if (!is.na(pos.merge[i,7])){
 90 |     pos.merge[i,2] <- pos.merge[i,7]
 91 |     pos.merge[i,3] <- pos.merge[i,8]
 92 |   } 
 93 | }
 94 | 
 95 | pos <- pos.merge[,c(1,2,3)]
 96 | 
 97 | write.table(WorldFilterd, "WorldFilterdResult",quote = F,row.names = F)
 98 | write.table(pos, "WorldFilterd.pos",quote = F,row.names = F)
 99 | 
100 | AA <- paste(pos[,2],pos[,2],sep="")
101 | AB <- paste(pos[,2],pos[,3],sep="")
102 | BB <- paste(pos[,3],pos[,3],sep="")
103 | 
104 | all <- cbind(AA,AB,BB,WorldFilterd)
105 | write.table(all,file = "all.txt",quote = F,sep=" ",row.names = F)
106 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/27_Permut.r:
--------------------------------------------------------------------------------
 1 | set.seed(98765)
 2 | N <- 100
 3 | r.mat <- matrix(rexp(N^2), ncol=N)
 4 | cv.mat <- crossprod(r.mat) # a positive definite covariance matrix
 5 | x.mat <- rep(1, N)
 6 | r.mean <- 3
 7 | r1 <- rnorm(N) # independent random noise
 8 | r.data <- x.mat * r.mean + drop( t(chol(cv.mat)) %*% r1 ) # correlated random noise
 9 | r.perm <- mvnpermute( r.data, x.mat, cv.mat, nr=1000, seed=1234)
10 | est.cvm <- cov(t(r.perm))
11 | plot(cv.mat, est.cvm); 
12 | abline(0,1)
13 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/28_Pluk.r:
--------------------------------------------------------------------------------
 1 | library(vegan)
 2 | setwd("/Users/guoyafei/Downloads/200328-普鲁克分析（Procrustes Analysis）评估物种-环境或功能关联度的一个示例")
 3 | #PCA降维及探索性分析
 4 | ##样方-环境属性矩阵
 5 | env <- read.delim('env_table.txt', row.names = 1, sep = '\t', stringsAsFactors = FALSE, check.names = FALSE)
 6 | 
 7 | #环境变量的 PCA 需要标准化，详情 ?rda
 8 | env_pca <- rda(env, scale = TRUE)
 9 | 
10 | ##样方-物种丰度矩阵
11 | otu <- read.delim('spe_table.txt', row.names = 1, sep = '\t', stringsAsFactors = FALSE, check.names = FALSE)
12 | 
13 | #物种数据 Hellinger 预转化
14 | otu_hel <- decostand(otu, method = 'hellinger')
15 | 
16 | #对转化后的物种数据执行 PCA，无需标准化
17 | otu_pca <- rda(otu_hel, scale = FALSE)
18 | 
19 | ##排序图比较，以 PCA 的 I 型标尺为例
20 | par(mfrow = c(1, 2))
21 | biplot(env_pca, choices = c(1, 2), scaling = 1, 
22 |        main = '环境组成的PCA', col = c('red', 'blue'))
23 | biplot(otu_pca, choices = c(1, 2), scaling = 1, 
24 |        main = '物种组成的PCA', col = c('red', 'blue'))
25 | #Procrustes分析度量两数据集一致性
26 | #Procrustes 分析
27 | #提取两个 PCA 中的样方排序坐标，均以 I 型标尺为例
28 | site_env <- summary(otu_pca, scaling = 1)$site
29 | site_otu <- summary(otu_pca, scaling = 1)$site
30 | 
31 | #执行 Procrustes 分析，详情 ?procrustes
32 | #以对称分析为例（symmetric = TRUE）
33 | proc <- procrustes(X = env_pca, Y = otu_pca, symmetric = TRUE)
34 | summary(proc)
35 | 
36 | #旋转图
37 | plot(proc, kind = 1, type = 'text')
38 | 
39 | #一些重要的结果提取
40 | names(proc)
41 | head(proc$Yrot)  #Procrustes 分析后 Y 的坐标
42 | head(proc$X)  #Procrustes 分析后 X 的坐标
43 | proc$ss  #偏差平方和 M2 统计量
44 | proc$rotation  #通过该值可获得旋转轴的坐标位置
45 | #残差图
46 | plot(proc, kind = 2)
47 | residuals(proc)  #残差值
48 | #PROTEST 检验，详情 ?protest
49 | #以 999 次置换为例
50 | #注：protest() 中执行的是对称 Procrustes 分析，X 和 Y 的分配调换不影响 M2 统计量的计算
51 | set.seed(123)
52 | prot <- protest(X = env_pca, Y = otu_pca, permutations = how(nperm = 999))
53 | prot
54 | 
55 | #重要统计量的提取
56 | names(prot)
57 | prot$signif  #p 值
58 | prot$ss  #偏差平方和 M2 统计量
59 | library(ggplot2)
60 | 
61 | #提取 Procrustes 分析的坐标
62 | Y <- cbind(data.frame(proc$Yrot), data.frame(proc$X))
63 | X <- data.frame(proc$rotation)
64 | 
65 | #添加分组信息
66 | group <- read.delim('group.txt', sep = '\t', stringsAsFactors = FALSE, check.names = FALSE)
67 | Y$samples <- rownames(Y)
68 | Y <- merge(Y, group, by = 'samples')
69 | #----------------------------------------------------
70 | #ggplot2 作图
71 | p <- ggplot(Y) +
72 |   geom_point(aes(X1, X2, color = groups), size = 1.5, shape = 16) +
73 |   geom_point(aes(PC1, PC2, color = groups), size = 1.5, shape = 1) +
74 |   scale_color_manual(values = c('red2', 'purple2', 'green3'), limits = c('A', 'B', 'C')) +
75 |   geom_segment(aes(x = X1, y = X2, xend = PC1, yend = PC2), arrow = arrow(length = unit(0.1, 'cm')),
76 |                color = 'blue', size = 0.3) +
77 |   theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'),
78 |         legend.key = element_rect(fill = 'transparent')) +
79 |   labs(x = 'Dimension 1', y = 'Dimension 2', color = '') +
80 |   geom_vline(xintercept = 0, color = 'gray', linetype = 2, size = 0.3) +
81 |   geom_hline(yintercept = 0, color = 'gray', linetype = 2, size = 0.3) +
82 |   geom_abline(intercept = 0, slope = X[1,2]/X[1,1], size = 0.3) +
83 |   geom_abline(intercept = 0, slope = X[2,2]/X[2,1], size = 0.3) +
84 |   annotate('text', label = sprintf('M^2 == 0.2178'),
85 |            x = -0.21, y = 0.42, size = 3, parse = TRUE) +
86 |   annotate('text', label = 'P < 0.001',
87 |            x = -0.21, y = 0.38, size = 3, parse = TRUE)
88 | 
89 | p
90 | 
91 | #输出图片
92 | ggsave('procrustes.pdf', p, width = 6, height = 5)
93 | ggsave('procrustes.png', p, width = 6, height = 5)


--------------------------------------------------------------------------------
/01_Vmap1.1R/29_Statistic.r:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Documents/lab/EGWAS/forYafei/")
 2 | #data <- read.csv("R1_count_statics.txt",header=T,stringsAsFactors = F,sep="\t")
 3 | #data <- read.csv("bam_size.txt",header=T,stringsAsFactors = F,sep="\t")
 4 | data <- read.csv("pileup_statics.txt",header=T,stringsAsFactors = F,sep="\t")
 5 | library(ggplot2)
 6 | #ggplot(data,aes(Reads.count))+geom_histogram(binwidth=0.01)
 7 | 
 8 | data$Taxon.name.5700 <- as.factor(data$Taxon.name.5700)
 9 | data$Flowcell.ID <- as.factor(data$Flowcell.ID)
10 | data$Lane <- as.factor(data$Lane)
11 | data$Library.ID <- as.factor(data$Library.ID)
12 | data$Company <- as.factor(data$Company)
13 | 
14 | cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
15 | #直方图
16 | 
17 | facet_grid(Company~.)+
18 |   scale_color_manual(values=cbPalette)
19 | 
20 | p+labs(x = "log(Reads count)", y = "Individual count", title = "Reads count at SNP sites in 5700 Individuals")+
21 |   theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16),plot.title = element_text(hjust = 0.5,vjust=0.5,size=20),axis.title.x = element_text(size = 17),axis.title.y = element_text(size = 17))+ 
22 |   theme(panel.border = element_blank(),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_line(colour = "black"))+
23 |   geom_vline(aes(xintercept=2.4548), colour="#990000", linetype="dashed")+
24 |   geom_vline(aes(xintercept=2.7559), colour="#E69F00", linetype="dashed")
25 | 
26 | #箱线图
27 | pbox <- ggplot(data,aes(Company,Sequencing_error,color=Company))+
28 |   geom_boxplot(outlier.colour="red", outlier.shape=8, outlier.size=4)+
29 |   scale_color_manual(values=cbPalette)+
30 |   geom_jitter(shape=16, position=position_jitter(0.2))+
31 |   labs(title = "Error Frrequency Distribution of 124 Libraries")+
32 |   theme(axis.text.x = element_text(size = 16),axis.text.y = element_text(size = 16),plot.title = element_text(hjust = 0.5,vjust=0.5,size=20),axis.title.x = element_text(size = 17),axis.title.y = element_text(size = 17),legend.text = element_text(size = 13))+
33 |   theme(panel.border = element_blank(),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_line(colour = "black"))
34 | 
35 | #多因素方差分析
36 | Result<-aov(Reads.count~Library.ID+Company+Library.ID:Company,data=data)
37 | anova(Result)
38 | interaction.plot(data$Company,data$Library.ID,data$log.bam_Mb.,type="b",legend =F,xlab="Company",ylab="log bamfile size(Mb)",main="The bamfile size(Mb) Distribution of 11,976 Individuals",cex.lab=1.5, cex.axis=1.5, cex.main=1.7, cex.sub=1.5)
39 | #单因素方差分析
40 | OneWay<-aov(Reads.count~Company,data=my)
41 | anova(OneWay)
42 | summary(OneWay)
43 | 
44 | library("gplots")
45 | plotmeans(log.Reads.count~Company,data=data,p=0.95,use.t=TRUE)
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/30_Wenn.r:
--------------------------------------------------------------------------------
 1 | **统计每个文件里的未分离位点
 2 | setwd("/data1/home/yafei/Project3/Download")
 3 | EC <- read.table("new_EC",header=T,stringsAsFactors=F)
 4 | TR <- read.table("new_TR",header=T,stringsAsFactors=F)
 5 | WW <- read.table("new_WW",header=T,stringsAsFactors=F)
 6 | 
 7 | bing <- read.table("/data1/home/yafei/Project3/Vmap1.1/out.txt",header=F,stringsAsFactors=F)
 8 | EC_id <- EC[,3]
 9 | WW_id <- WW[,3]
10 | TR_id <- TR[,3]
11 | 
12 | noSep_EC <- read.table("EC_noSep.txt",header=F,stringsAsFactors=F)
13 | noSep_TR <- read.table("TR_noSep.txt",header=F,stringsAsFactors=F)
14 | noSep_WW <- read.table("WW_noSep.txt",header=F,stringsAsFactors=F)
15 | 
16 | #1. 算各文件的分离位点
17 | EC_sep <- EC_id[-which(EC_id %in% noSep_EC[,1])]
18 | WW_sep <- WW_id[-which(WW_id %in% noSep_WW[,1])]
19 | TR_sep <- TR_id[-which(TR_id %in% noSep_TR[,1])]
20 | #2. 求其他文件的分离位点
21 | TR_WW_V <- c(TR_sep,WW_sep,bing[,1])
22 | EC_WW_V <- c(EC_sep,WW_sep,bing[,1])
23 | EC_TR_V <- c(EC_sep,TR_sep,bing[,1])
24 | #3. 求实际各文件的非分离位点
25 | noSep_real_EC <- noSep_EC[-which(noSep_EC[,1] %in% TR_WW_V),1]
26 | noSep_real_TR <- noSep_TR[-which(noSep_TR[,1] %in% EC_WW_V),1]
27 | noSep_real_WW <- noSep_WW[-which(noSep_WW[,1] %in% EC_TR_V),1]
28 | #4. 求实际各文件的分离位点
29 | Sep_real_EC <- EC_id[-which(EC_id %in% noSep_real_EC)]
30 | Sep_real_TR <- TR_id[-which(TR_id %in% noSep_real_TR)]
31 | Sep_real_WW <- WW_id[-which(WW_id %in% noSep_real_WW)]
32 | #画韦恩图
33 | #三个文件
34 | Length_EC<-length(Sep_real_EC)
35 | Length_TR<-length(Sep_real_TR)
36 | Length_WW<-length(Sep_real_WW)
37 | Length_EC_TR<-length(intersect(Sep_real_EC,Sep_real_TR))
38 | Length_WW_TR<-length(intersect(Sep_real_WW,Sep_real_TR))
39 | Length_EC_WW<-length(intersect(Sep_real_EC,Sep_real_WW))
40 | library(VennDiagram)
41 | T<-venn.diagram(list(EC_86=Sep_real_EC,TR_28=Sep_real_TR,WW_741=Sep_real_WW),filename=NULL,lwd=1,lty=2,col=c('red','green','blue'),fill=c('red','green','blue'),cat.col=c('red','green','blue'),reverse=TRUE)
42 | pdf("Venn.pdf")
43 | grid.draw(T)
44 | dev.off()
45 | #两个文件
46 | pdf("Venn2.pdf")
47 | draw.pairwise.venn(area1=Length_A,area2=Length_B,cross.area=Length_AB,category=c('A','B'),lwd=rep(1,1),lty=rep(2,2),col=c('red','green'),fill=c('red','green'),cat.col=c('red','green'),rotation.degree=90)
48 | dev.off()
49 | 
50 | write.table(Sep_real_EC,"Sep_real_EC", quote=F,row.names=F)
51 | write.table(Sep_real_TR,"Sep_real_TR", quote=F,row.names=F)
52 | write.table(Sep_real_WW,"Sep_real_WW", quote=F,row.names=F)
53 | 
54 | #输出hapscanner的输入文件：pos.txt & posAllele.txt
55 | Real_WW <- read.table("Sep_real_WW", header=F,stringsAsFactors=F)
56 | selec <- WW[which(WW$ID %in% Real_WW[,1]),] 
57 | posAllele <- selec[,c(1,2,4,5)]
58 | colnames(posAllele) <- c("Chr","Pos","Ref","Alt")
59 | pos <- selec[,c(1,2)]
60 | colnames(pos) <- c("Chr","Pos")
61 | 
62 | for(i in 1:42){
63 |   a <- posAllele[which(posAllele$Chr == i),]
64 |   fileA <- paste("/data1/home/yafei/Project3/Download/Hapscanner/chr",i,"_posAllele.txt",sep="")
65 |   write.table(a,fileA,row.names=F,quote=F,sep="\t")
66 |   b <- pos[which(pos$Chr == i),]
67 |   fileB <- paste("/data1/home/yafei/Project3/Download/Hapscanner/chr",i,"_pos.txt",sep="")
68 |   write.table(b,fileB,row.names=F,quote=F,sep="\t")
69 | }
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/31_WheatMap.r:
--------------------------------------------------------------------------------
 1 | data <- read.table("Down+Vmap.txt",header=T,stringsAsFactors=F)
 2 | library(ggmap)
 3 | library(ggplot2)
 4 | library(sp)
 5 | library(maptools)
 6 | library(maps)
 7 | register_google(key = "AIzaSyAFnTwa7kEJAqIeHU-Kw6qPZiPUU-zXCLI")
 8 | visit.x<-as.numeric(caseAll$Logititude)
 9 | visit.y<-as.numeric(caseAll$Latitude)
10 | mp<-NULL #定义一个空的地图
11 | mapworld<-borders("world",colour = "gray50",fill="white") #绘制基本地图
12 | mp<-ggplot()+mapworld+ylim(-60,90)
13 | #利用ggplot呈现，同时地图纵坐标范围从-60到90
14 | mp2<-mp+geom_point(aes(x=visit.x,y=visit.y, size=caseAll$count),color="darkorange")+scale_size(range=c(1,1))
15 | #绘制带点的地图，geom_point是在地图上绘制点，x轴为经度信息，y轴为纬度信息，size是将点的大小按照收集的个数确定，color为暗桔色，scale_size是将点变大一些
16 | mp3<-mp2+labs(x="Longitude", y="Latitude")+
17 |   theme(legend.position = "none",panel.background = element_blank(),axis.title.x = element_text(size = 25),axis.title.y = element_text(size = 25),axis.text.x = element_text(size = 25),axis.text.y = element_text(size = 25),panel.border = element_blank()) #将图例去掉
18 | pdf("castAll.pdf",width=20,height=8)
19 | mp3
20 | dev.off()
21 | 
22 | 
23 | ###画世界地图###
24 | data <- read.table("/Users/guoyafei/Desktop/dataset_accessions_WGS.txt",header=T,stringsAsFactors = F)
25 | library(maps)
26 | data$color  <- NA
27 | #map1
28 | data[which(data$Region == "AF"), 11 ] <- "darkblue"
29 | data[which(data$Region == "AM"), 11 ] <- "brown"
30 | data[which(data$Region == "EA"), 11 ] <- "purple"
31 | data[which(data$Region == "EU"), 11 ] <- "yellow"
32 | data[which(data$Region == "SCA"), 11 ] <- "red"
33 | data[which(data$Region == "WA"), 11 ] <- "green"
34 | pdf(file = "map1.pdf",width=14,height=7)
35 | map(database="world",fill = T,col = "grey",interior = F,border=NA)
36 | points(x = data$Logititude, y = data$Latitude, col =data$color, pch=20,cex=0.5)
37 | legend("right",legend = c("AF","AM","EA","EU","SCA","WA"),col = c("darkblue","brown","purple","yellow","red","green",rgb(0,255/255,255/255),rgb(255/255,0,255/255)),pch=20,cex=1,box.lty=0)
38 | dev.off()
39 | #map2
40 | data[which(data$Genome == "SS"), 12 ] <- "green"
41 | data[which(data$Genome == "AA"), 12 ] <- "brown"
42 | data[which(data$Genome == "DD"), 12 ] <- "purple"
43 | data[which(data$Genome == "AABB"), 12 ] <- "yellow"
44 | data[which(data$Genome == "AABBDD"), 12 ] <- "red"
45 | pdf(file = "map2.pdf",width=14,height=7)
46 | map(database="world",fill = T,col = "grey",interior = F,border=NA)
47 | points(x = data$Logititude, y = data$Latitude, col =data$color, pch=20,cex=0.5)
48 | legend("right",legend = c("SS","AA","DD","AABB","AABBDD"),col = c("green","brown","purple","yellow","red",rgb(0,255/255,255/255),rgb(255/255,0,255/255)),pch=20,cex=1,box.lty=0)
49 | dev.off()
50 | 
51 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/32_NeSize.r:
--------------------------------------------------------------------------------
 1 | library(RColorBrewer)
 2 | #color <- brewer.pal(5, "RdBu")[c(2,5)]
 3 | #color <- append(color,brewer.pal(5, "PuOr")[c(2,5)])
 4 | #brewer.pal(5, "PiYG")[c(2,5)]
 5 | #brewer.pal(11, "RdYlBu")[1]
 6 | color <- c(brewer.pal(5, "RdBu")[c(2,5)],brewer.pal(5, "PuOr")[c(2,5)],brewer.pal(5, "PiYG")[c(2,5)],brewer.pal(11, "RdYlBu")[c(10,11)])
 7 | setwd("/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/Ne")
 8 | 
 9 | setwd("~/Desktop/")
10 | ####改格式
11 | library(RColorBrewer)
12 | library(cowplot)
13 | library(viridis)
14 | display.brewer.all()
15 | brewer.pal(12,'Set3')
16 | brewer.pal(7,'PuRd')
17 | brewer.pal(7,'Blues')
18 | brewer.pal(7,'OrRd')
19 | brewer.pal(7,'Greens')
20 | a = seq(1:10)
21 | #cols = viridis(8)
22 | cols <- color
23 | plot(NULL,xlim = c(2,3),ylim = c(0,8),axes=F,xlab="",ylab="")
24 | #set.seed(2)
25 | #rom = sample(1:14,14,replace = F)
26 | j = 0
27 | for (i in (1:8)){
28 |   j=j+1
29 |   lines(a,rep(j,10),type="l",col=cols[i],lwd = 19)
30 | }
31 | #col1 = c("#66C2A5", "#FC8D62", "#8DA0CB","#FFD92F","#E78AC3","#07dede","#E5C494")
32 | #col2 = c("#f15a24","#779970","#8dd3c7","#fcee21","#ba9bc9","#619cff","#c69c6d")
33 | col1 = c("#f5e1fa", "#C6DBEF", "#FDD49E","#9FDA3AFF","#8dd3c7","#277F8EFF","#FC8D59")
34 | #setwd("/Users/xuebozhao/Documents/LuLab/wheatSpeciation/smcpp/getpoints")
35 | dataf = read.table("/Volumes/My Passport/01_VMap1.1/02_Environment/03_Ne/count2.txt",header = T)
36 | unique(dataf$Subspecies)
37 | datafd <- dataf[which(dataf$Subspecies != "Urartu" & dataf$Subspecies != "Speltoilds" & dataf$Time <=10000),]
38 | #datafd$Subspecies = factor(datafd$Subspecies,levels=c("Urartu","Speltoilds","Strangulata","Wild_emmer",
39 | #                                                      "Domesticated_emmer","Free_threshing","Bread_wheat"))
40 | 
41 | datafd$Subspecies = factor(datafd$Subspecies,levels=c("Strangulata","Wild_emmer",
42 |                                                       "Domesticated_emmer","Free_threshing","Bread_wheat"))
43 | 
44 | 
45 | #col1 <- c(brewer.pal(10, "RdBu")[c(3,4,5,7,8,9)],brewer.pal(8, "Set1")[6])
46 | 
47 | col1 <- c(brewer.pal(10, "RdBu")[c(3,4,7,8)],brewer.pal(8, "Set1")[6])
48 | 
49 | col1 <- c(brewer.pal(5, "RdBu")[5],brewer.pal(5, "PuOr")[5],brewer.pal(5, "PiYG")[5],brewer.pal(5, "RdBu")[2],brewer.pal(5, "PuOr")[2],brewer.pal(5, "PiYG")[2],brewer.pal(11, "RdYlBu")[11])
50 | 
51 | p1 = ggplot(datafd, aes(x = Time,y = Contribution,fill = Subspecies))+
52 |   ####position="stack"堆叠状
53 |   ####position="stack" 改为"postion_stack(reverse=T) 反向堆叠
54 |   geom_bar(stat ="identity",width = 800,position ="stack")+
55 |   scale_fill_manual(values = col1)+
56 |   labs(x = "",y = "All effective population size",size = rel(1)) +
57 |   #geom_text(aes(label = datafd$Num),position=position_stack(vjust =0.5),size = 5)+ 这行代码是显示个数的数字
58 |   guides(fill = guide_legend(reverse = F))+
59 |   theme_bw()+
60 |   theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1,size = rel(0.8))) +
61 |   theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=0.5,colour="black"))
62 | 
63 | ggplot(datafd, aes(x = Time,y = Contribution,fill = Subspecies))+
64 |   ####position="stack"堆叠状
65 |   ####position="stack" 改为"postion_stack(reverse=T) 反向堆叠
66 |   geom_bar(stat ="identity",width = 800,position ="fill")+
67 |   scale_fill_manual(values = col1)+
68 |   labs(x = "",y = "All effective population size",size = rel(1)) +
69 |   #geom_text(aes(label = datafd$Num),position=position_stack(vjust =0.5),size = 5)+ 这行代码是显示个数的数字
70 |   guides(fill = guide_legend(reverse = F))+
71 |   theme_bw()+
72 |   theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1,size = rel(0.8))) +
73 |   theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=0.5,colour="black"))
74 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/33_vf_pipe.sh:
--------------------------------------------------------------------------------
 1 | #############################################volcano Finder#######################################
 2 | # VolcanoFinder is a program to perform genome-wide scans of adaptive introgression by using the
 3 | # spatial distribution of within-species polymorphism and between species substitution in the genome.
 4 | # input: 1.Allele frequency file 2.site frequency spectrum(sfs)3.ancestral file
 5 | ##工作路径: /data1/home/xuebo/Projects/Speciation/volcanofinder
 6 | 
 7 | ###### installation ######
 8 | wget -c http://degiorgiogroup.fau.edu/vf.html
 9 | tar -xzvf volcanofinder_v1.0.tar.gz
10 | cd volcanofinder_v1.0
11 | make
12 | vim ~/.bashrc
13 | source ~/.bashrc
14 | 
15 | ###### prepare allele frequency file ######
16 | # 1.calculate allele numbers of ref and al
17 | # ex:chr1	821443	821444	T	1137	C	3
18 | vcftools --gzvcf /data1/home/xuebo/Projects/Speciation/E3/chr36.all.vcf.gz  --counts --out chr36_part1
19 | cat chr36_part1.frq.count |tail -n +2 |awk '{print $1"\t"$2-1"\t"$2"\t"substr($5,1,1)"\t"substr($5,3,length($5))"\t"substr($6,1,1)"\t"substr($6,3,length($6))}'>chr36_part1.temp
20 | # 2.extract and non-polarized sites to obtain Freq file
21 | # ex:position x n folded
22 | # 460000 9 100 0
23 | # 460010 100 100 0
24 | # 460210 30 78 1
25 | awk '{print $1"\t"$2-1"\t"$2"\t"$3}' A_pos_ances.txt >  A_pos_ances.bed 
26 | awk '{print $1"\t"$2-1"\t"$2"\t"$3}' B_pos_ances.txt >  B_pos_ances.bed 
27 | awk '{print $1"\t"$2-1"\t"$2"\t"$3}' D_pos_ances.txt >  D_pos_ances.bed 
28 | mkdir -p Dsplit_ances
29 | for chr in `cut -f 1 D_pos_ances.bed | sort | uniq`; do
30 |                 grep -w $chr D_pos_ances.bed > Dsplit_ances/$chr.pos_ances.bed
31 | done
32 | ***intersect,取交集，并且找出谁是ancestral，格式是pos\tderived\tderived+ancestral
33 | bedtools intersect -a ../anc/Dsplit_ances/36.pos_ances.bed  -b chr36_part1.temp -wo |awk '{if($4==$8)print $2"\t"$11"\t"$11+$9"\t""0";else print $2"\t"$9"\t"$9+$11"\t""0" }' >polarized.freq
34 | ***找出-a里面的特有的pos
35 | bedtools subtract -b ../anc/Dsplit_ances/36.pos_ances.bed  -a chr36_part1.temp |awk '{print $2"\t"$7"\t"$5+$7"\t""1"}' >>polarized.freq
36 | # remove undistinguished sites
37 | awk '{if($2==0)print $0 }' polarized.freq >x
38 | awk '{if($2==$3&&$4==1)print $0 }' polarized.freq >>x
39 | sort -k1,1n x>xx
40 | awk 'FNR==NR{a[$0]++;next}(!($0 in a))' xx polarized.freq >FreqFile
41 | sort -k1,1n FreqFile |sed '1i position\tx\tn\tfolded' > FreqFilesort
42 | # 3.sum to sfs Spect file
43 | # 604 is alleles number
44 | for var in {1..604};do awk -v j=$var '{if($2==j)print $2}' FreqFile| wc -l >> tempp ; done
45 | 
46 | 
47 | # 104072 is snp number  zcat /data1/home/xuebo/Projects/Speciation/E3/chr36.all.vcf.gz | grep -v "#" | wc -l
48 | awk '{print NR"\t"$1/104072}' tempp >SpectFile
49 | 
50 | ###### run VolcanoFinder by blocks #######
51 | # g:block length(ex:1000bp) G:step length(ex:1000snp)
52 | #ex:VolcanoFinder -big 1000 FreqFile SpectFile -1 1 1 xbsj 1 50
53 | #VolcanoFinder -big g FreqFile SpectFile D P MODEL OutFile BLOCK NBLOCK
54 | #VolcanoFinder -bi G FreqFile SpectFile D P MODEL OutFile BLOCK NBLOCK
55 | VolcanoFinder -big 100 FreqFilesort SpectFile -1 1 1 xbsj 1 50
56 | 
57 | # merge results
58 | #VolcanoFinder -m xbsj 50
59 | VolcanoFinder -m OutFile NBLOCK


--------------------------------------------------------------------------------
/01_Vmap1.1R/34_Vmap3.sh:
--------------------------------------------------------------------------------
 1 | #Vmap3
 2 | #yafei@204:/data2/yafei/Vmap3/sort
 3 | #204: /data3/wgs/fastq/wildEmmer_S35
 4 | zcat /data3/wgs/fastq/wildEmmer_S35/Rawdata/SFS1_1.fq.gz | paste - - - - | sort -k1,1 -t " " | tr '\t' '\n' | gzip -c > /data2/yafei/Vmap3/sort/SFS1_1.sorted.fq.gz
 5 | zcat /data3/wgs/fastq/wildEmmer_S35/Rawdata/SFS1_2.fq.gz | paste - - - - | sort -k1,1 -t " " | tr '\t' '\n' | gzip -c > /data2/yafei/Vmap3/sort/SFS1_2.sorted.fq.gz
 6 | 
 7 | zcat /data1/home/liping/gasdata/cleandata/CRR061687_f1.filtered.fq.gz |grep "@" |sort |uniq -c > /data2/liping/Vmap3/CRR061687_f1.id
 8 | zcat /data1/home/liping/gasdata/cleandata/CRR061687_r2.filtered.fq.gz |grep "@" |sort |uniq -c > /data2/liping/Vmap3/CRR061687_r2.id
 9 | zcat /data1/home/liping/gasdata/cleandata/CRR061717_f1.filtered.fq.gz |grep "@" |sort |uniq -c > /data2/liping/Vmap3/CRR061717_f1.id
10 | zcat /data1/home/liping/gasdata/cleandata/CRR061717_r2.filtered.fq.gz |grep "@" |sort |uniq -c > /data2/liping/Vmap3/CRR061717_r2.id
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/35_ChrBin.R:
--------------------------------------------------------------------------------
 1 | library(CMplot)
 2 | setwd("/Users/guoyafei/Documents/个人项目/Project-4-VmapIII/Fastcall2/测试数据/")
 3 | 
 4 | mydata<-read.table("/Users/guoyafei/Documents/个人项目/Project-4-VmapIII/Fastcall2/测试数据/fastcall2_001_pos.txt",header=TRUE,sep="\t")
 5 | head(mydata)
 6 | # snp         chr       pos
 7 | # snp1_1    1        2041
 8 | # snp1_2    1        2062
 9 | # snp1_3    1        2190
10 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 
11 | mydata<-read.table("/Users/guoyafei/Documents/个人项目/Project-4-VmapIII/Fastcall2/测试数据/fastcall_001_pos.txt",header=TRUE,sep="\t")
12 | head(mydata)
13 | # snp         chr       pos
14 | # snp1_1    1        2041
15 | # snp1_2    1        2062
16 | # snp1_3    1        2190
17 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 
18 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/36_LDJump.R:
--------------------------------------------------------------------------------
1 | require(LDJump)
2 | results = LDJump("/data1/home/yafei/LDJump-test/HatLandscapeN16Len1000000Nrhs15_th0.01_540_1.fa", alpha = 0.05, segLength = 1000, pathLDhat = "/data1/home/yafei/Software/LDhat-master/", pathPhi = "/data1/home/yafei/Software/PhiPack/Phi", format = "fasta", refName = NULL)
3 | postscript("Results.eps", horiz = F)
4 | plot(results[[1]], xlab = "Segments", ylab = "Estimated Recombination Rate", main = "Estimated recombination map with LDJump")
5 | dev.off()


--------------------------------------------------------------------------------
/01_Vmap1.1R/37_permut.R:
--------------------------------------------------------------------------------
 1 | set.seed(98765)
 2 | N <- 100
 3 | r.mat <- matrix(rexp(N^2), ncol=N)
 4 | cv.mat <- crossprod(r.mat) # a positive definite covariance matrix
 5 | x.mat <- rep(1, N)
 6 | r.mean <- 3
 7 | r1 <- rnorm(N) # independent random noise
 8 | r.data <- x.mat * r.mean + drop( t(chol(cv.mat)) %*% r1 ) # correlated random noise
 9 | r.perm <- mvnpermute( r.data, x.mat, cv.mat, nr=1000, seed=1234)
10 | est.cvm <- cov(t(r.perm))
11 | plot(cv.mat, est.cvm); 
12 | abline(0,1)
13 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/38_Venn.R:
--------------------------------------------------------------------------------
 1 | **统计每个文件里的未分离位点
 2 | setwd("/data1/home/yafei/Project3/Download")
 3 | EC <- read.table("new_EC",header=T,stringsAsFactors=F)
 4 | TR <- read.table("new_TR",header=T,stringsAsFactors=F)
 5 | WW <- read.table("new_WW",header=T,stringsAsFactors=F)
 6 | 
 7 | bing <- read.table("/data1/home/yafei/Project3/Vmap1.1/out.txt",header=F,stringsAsFactors=F)
 8 | EC_id <- EC[,3]
 9 | WW_id <- WW[,3]
10 | TR_id <- TR[,3]
11 | 
12 | noSep_EC <- read.table("EC_noSep.txt",header=F,stringsAsFactors=F)
13 | noSep_TR <- read.table("TR_noSep.txt",header=F,stringsAsFactors=F)
14 | noSep_WW <- read.table("WW_noSep.txt",header=F,stringsAsFactors=F)
15 | 
16 | #1. 算各文件的分离位点
17 | EC_sep <- EC_id[-which(EC_id %in% noSep_EC[,1])]
18 | WW_sep <- WW_id[-which(WW_id %in% noSep_WW[,1])]
19 | TR_sep <- TR_id[-which(TR_id %in% noSep_TR[,1])]
20 | #2. 求其他文件的分离位点
21 | TR_WW_V <- c(TR_sep,WW_sep,bing[,1])
22 | EC_WW_V <- c(EC_sep,WW_sep,bing[,1])
23 | EC_TR_V <- c(EC_sep,TR_sep,bing[,1])
24 | #3. 求实际各文件的非分离位点
25 | noSep_real_EC <- noSep_EC[-which(noSep_EC[,1] %in% TR_WW_V),1]
26 | noSep_real_TR <- noSep_TR[-which(noSep_TR[,1] %in% EC_WW_V),1]
27 | noSep_real_WW <- noSep_WW[-which(noSep_WW[,1] %in% EC_TR_V),1]
28 | #4. 求实际各文件的分离位点
29 | Sep_real_EC <- EC_id[-which(EC_id %in% noSep_real_EC)]
30 | Sep_real_TR <- TR_id[-which(TR_id %in% noSep_real_TR)]
31 | Sep_real_WW <- WW_id[-which(WW_id %in% noSep_real_WW)]
32 | #画韦恩图
33 | #三个文件
34 | Length_EC<-length(Sep_real_EC)
35 | Length_TR<-length(Sep_real_TR)
36 | Length_WW<-length(Sep_real_WW)
37 | Length_EC_TR<-length(intersect(Sep_real_EC,Sep_real_TR))
38 | Length_WW_TR<-length(intersect(Sep_real_WW,Sep_real_TR))
39 | Length_EC_WW<-length(intersect(Sep_real_EC,Sep_real_WW))
40 | library(VennDiagram)
41 | T<-venn.diagram(list(EC_86=Sep_real_EC,TR_28=Sep_real_TR,WW_741=Sep_real_WW),filename=NULL,lwd=1,lty=2,col=c('red','green','blue'),fill=c('red','green','blue'),cat.col=c('red','green','blue'),reverse=TRUE)
42 | pdf("Venn.pdf")
43 | grid.draw(T)
44 | dev.off()
45 | #两个文件
46 | pdf("Venn2.pdf")
47 | draw.pairwise.venn(area1=Length_A,area2=Length_B,cross.area=Length_AB,category=c('A','B'),lwd=rep(1,1),lty=rep(2,2),col=c('red','green'),fill=c('red','green'),cat.col=c('red','green'),rotation.degree=90)
48 | dev.off()
49 | 
50 | write.table(Sep_real_EC,"Sep_real_EC", quote=F,row.names=F)
51 | write.table(Sep_real_TR,"Sep_real_TR", quote=F,row.names=F)
52 | write.table(Sep_real_WW,"Sep_real_WW", quote=F,row.names=F)
53 | 
54 | #输出hapscanner的输入文件：pos.txt & posAllele.txt
55 | Real_WW <- read.table("Sep_real_WW", header=F,stringsAsFactors=F)
56 | selec <- WW[which(WW$ID %in% Real_WW[,1]),] 
57 | posAllele <- selec[,c(1,2,4,5)]
58 | colnames(posAllele) <- c("Chr","Pos","Ref","Alt")
59 | pos <- selec[,c(1,2)]
60 | colnames(pos) <- c("Chr","Pos")
61 | 
62 | for(i in 1:42){
63 | a <- posAllele[which(posAllele$Chr == i),]
64 | fileA <- paste("/data1/home/yafei/Project3/Download/Hapscanner/chr",i,"_posAllele.txt",sep="")
65 | write.table(a,fileA,row.names=F,quote=F,sep="\t")
66 | b <- pos[which(pos$Chr == i),]
67 | fileB <- paste("/data1/home/yafei/Project3/Download/Hapscanner/chr",i,"_pos.txt",sep="")
68 | write.table(b,fileB,row.names=F,quote=F,sep="\t")
69 | }
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/43_est.sh:
--------------------------------------------------------------------------------
 1 | #---------------------------Dlineage_PopDiv_no.est---------------------------
 2 | // Priors and rules file
 3 | // *********************
 4 | 
 5 | [PARAMETERS]
 6 | //#isInt? #name   #dist.#min  #max
 7 | //all Ns are in number of haploid individuals
 8 | 1  pop1		logunif	100	 100000	 output
 9 | 1  pop2		logunif	100	 100000	 output
10 | 0  RSANC	logunif	0.1	 100	 output
11 | 0  TPROP	logunif 0.0001	 0.5	 output
12 | 
13 | [RULES]
14 | 
15 | [COMPLEX PARAMETERS]
16 | #---------------------------------- Dlineage_PopDiv_no.tpl -------------------------------
17 | //Parameters for the coalescence simulation program : fastsimcoal.exe
18 | 2 samples to simulate :
19 | //Population effective sizes (number of genes)
20 | pop1
21 | pop2
22 | //Haploid samples sizes 
23 | 40
24 | 40
25 | //Growth rates: negative growth implies population expansion
26 | 0
27 | 0
28 | //Number of migration matrices : 0 implies no migration between demes
29 | 0
30 | //historical event: time, source, sink, migrants, new deme size, new growth rate, migration matrix index
31 | 2 historical event
32 | 10000 1 0 1 RSANC 0 0
33 | 6500 0 1 0 RSANC 0.0002 0
34 | //Number of independent loci [chromosome] 
35 | 1 0
36 | //Per chromosome: Number of contiguous linkage Block: a block is a set of contiguous loci
37 | 1
38 | //per Block:data type, number of loci, per generation recombination and mutation rates and optional parameters
39 | FREQ 1 0 6.5e-9 OUTEXP
40 | 
41 | #run
42 | ./fsc26 -t Dlineage_PopDiv_diff.tpl -e Dlineage_PopDiv_diff.est -d -0 -n 100000 -L 40 -s 0 -M -q -c80
43 | -d:dsfs
44 | -0:不考虑 SFS 似然计算的单态位点
45 | -C:在似然计算中考虑的最小观察到的 SFS 条目计数（默认值 = 1，但值可以 < 1。例如 0.5）
46 | -n:要执行的模拟次数，也应用于参数估计
47 | -L:在 lhood 最大化期间要执行的循环（ECM 循环）数。 默认为 20
48 | -s:输出 DNA 作为 SNP 数据，并指定最大数量。 要输出的 SNP（使用 0 输出所有 SNP）。
49 | -M:从迭代之间的 SFS 值通过 maxlhood 执行参数估计
50 | -q:quiet
51 | -c:用于参数估计的openMP线程数（默认=1，max=numBatches，使用0让openMP选择最优值）
52 |   
53 | -i:参数文件的名字
54 | -E:从参数先验中抽取的次数（可选）, 在模板文件中替换列出的参数值
55 | -g:使用基因型数据生成 arlequin 项目
56 | -S:输出整个 DNA 序列，包括单态位点
57 | -I:根据无限位点 (IS) 突变模型生成 DNA 突变
58 | -m:msfs
59 | 
60 | Historical events can be used to: 
61 | • Change the size of a given population. 
62 | • Change the growth rate of a given population. 
63 | • Change the migration matrix to be used between populations. 
64 | • Move a fraction of the genes of a given population to another population. This amounts to 
65 | implementing a (stochastic) admixture or introgression event. 
66 | • Move all genes from a population to another population. This amounts to fusing two 
67 | populations into one, looking backward in time, or implementing a populations’ fission 
68 | looking forward in time. 
69 | • One or more of these events at the same time
70 | 
71 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/45_Admixture.sh:
--------------------------------------------------------------------------------
 1 | #按MAF>0.05和缺失率<0.1过滤
 2 | plink --vcf test.imputed.vcf --maf 0.05 --geno 0.1 --recode vcf-iid --out test.filter --allow-extra-chr --double-id --autosome-num 42
 3 | --keep-only-indels
 4 | #对VCF进行LD筛选
 5 | plink --vcf test.filter.vcf --indep-pairwise 50 10 0.2 --out test.filterLD --allow-extra-chr --double-id --autosome-num 42
 6 | 
 7 | #提取筛选结果
 8 | #plink --vcf test.filter.vcf --make-bed --extract test.filterLD.prune.in --out  test.filter.prune.in --double-id --autosome-num 42
 9 | plink --vcf test.filter.vcf --extract test.filterLD.prune.in --recode vcf-iid --out  test.filter.prune.in --double-id --autosome-num 42
10 | 
11 | #转换成structure/admixture格式
12 | plink --bfile test.filter.prune.in --recode structure --out test.filter.prune.in  #生成. recode.strct_in为structure输入格式
13 | plink --bfile test.filter.prune.in --recode 12 --out test.filter.prune.in --autosome-num 42 #生成.ped为admixture输入格式
14 | plink --bfile test.filter.prune.in --recode vcf --out test.filter.prune.in --autosome-num 42
15 | #admixture
16 | admixture --cv test.filter.ped 1 >>log.txt
17 | admixture --cv test.filter.ped 2 >>log.txt
18 | .......
19 | admixture --cv test.filter.ped 13 >>log.txt
20 | wait
21 | grep "CV error" log.txt > k_1to13
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/49_Variants_Age.sh:
--------------------------------------------------------------------------------
 1 | Working Directory:
 2 | xuebo@204:/data2/xuebo/Projects/Speciation/Variant_Age/Chr
 3 | xuebo@204:/data2/xuebo/Projects/Speciation/Variant_Age/Lineage
 4 | xuebo@204:/data2/xuebo/Projects/Speciation/E6/Landrace_locate_225/Landrace_225_noAM_220_maf001/LD/chr6.lineage_Landrace_225_noAM_220_maf001_LD.vcf
 5 | 
 6 | for i in {1..42}
 7 | do
 8 | bash 42-21.sh /data2/xuebo/Projects/Speciation/E6/Landrace_locate_225/Landrace_225_noAM_220_maf001/LD/chr${i}.lineage_Landrace_225_noAM_220_maf001_LD.vcf > chr${i}.vcf &
 9 | done
10 | vcf-concat chr1.vcf chr2.vcf | bgzip -c > chr1A.vcf.gz
11 | 
12 | #step1 转格式(vcf文件染色体号不能有字母，重组率文件必须有chr)
13 | #yafei@204:/data1/home/yafei/Test
14 | plink2 --vcf A_position.vcf.recode.vcf --allow-extra-chr --alt1-allele 'force' A_Alt.Anc.vcf.gz 4 3 '#' --export vcf --out A_Anc --autosome-num 42
15 | plink2 --vcf B_position.vcf.recode.vcf --allow-extra-chr --alt1-allele 'force' B_Alt.Anc.vcf.gz 4 3 '#' --export vcf --out B_Anc --autosome-num 42
16 | plink2 --vcf D_position.vcf.recode.vcf --allow-extra-chr --alt1-allele 'force' D_Alt.Anc.vcf.gz 4 3 '#' --export vcf --out D_Anc --autosome-num 42
17 | #xuebo@204:/data2/xuebo/Projects/Speciation/Variant_Age/Chr
18 | 
19 | for i in {1..7}
20 | do
21 | for j in {"A","B","D"}
22 | do
23 | geva_v1beta --vcf ../Chr/chr${i}${j}.vcf.gz --map recombination_rate_geva2.txt --out chr${i}${j}_rec
24 | done
25 | done
26 | 
27 | for i in `ls *marker.txt`
28 | do
29 | awk '{print $3}' ${i} | sed '1,2d' |sed '$d' > ${i::-11}.pos &
30 | done
31 | 
32 | #step2 运行
33 | for i in {1..7}
34 | do
35 | for j in {"A","B","D"}
36 | do
37 | geva_v1beta -i chr${i}${j}_rec.bin -o ${i}${j}_RUN1 --positions chr${i}${j}_rec.pos --Ne 70000 --mut 6.5e-9 --hmm /data1/home/xuebo/software/geva-master/hmm/hmm_initial_probs.txt /data1/home/xuebo/software/geva-master/hmm/hmm_emission_probs.txt
38 | done
39 | done
40 | 
41 | for i in {1..7}
42 | do
43 | for j in {"A","B","D"}
44 | do
45 | Rscript /data1/home/xuebo/software/geva-master/estimate.R ${i}${j}_RUN1.pairs.txt 70000
46 | done
47 | done
48 | 
49 | ---------------------------------------------本地画图---------------------------------------------
50 | xuebo@204:/data2/xuebo/Projects/Speciation/Variant_Age/rec
51 | 
52 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/06_Allele_Age")
53 | 
54 | for i in `ls *rec.marker.txt`; do sed '2d' $i | sed '$d' > ${i::-11}.txt; done
55 | for i in `ls *RUN1.sites2.txt`; do grep "J" $i | sed '1i MarkerID\tClock\tN_Concordant\tN_Discordant\tPostMode' > chr${i::-11}.txt; done
56 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/53_37gene_haplotype.r:
--------------------------------------------------------------------------------
 1 | #统一设置：注释内容及颜色
 2 | library(pheatmap)
 3 | require(reshape)
 4 | require (rworldmap)
 5 | require(rworldxtra)
 6 | library(RColorBrewer)
 7 | cols <- c("#225EA8","#DEEBF7","#FEB24C","#BD0026")
 8 | setwd("/Users/guoyafei/Documents/01_VMap1.1/02_Environment/08_snpEff/VCF_withAn/225_TXT")
 9 | #annotation_col <- read.table("/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/E6_Landrace_locate_225.txt",header=T,stringsAsFactors = T)
10 | #rownames(annotation_col) = c(1:225)
11 | annotation <- read.table("/Users/guoyafei/Documents/01_VMap1.1/02_Environment/01_RDA_plot/select_taxa4.txt", header=T,  stringsAsFactors = F, sep="\t")
12 | sub <- annotation[!is.na(annotation$ID),]
13 | rownames(sub) <- sub$ID
14 | anno <- sub[,7,drop=FALSE]
15 | #--------------------------------------------------------------------------------------------------------------------------------------
16 | #AB
17 | AB_name <- read.table("/Users/guoyafei/Documents/01_VMap1.1/02_Environment/08_snpEff/VCF_withAn/225_TXT/AB_name.txt",header=F,stringsAsFactors = F)
18 | AB_anno <- anno[which(rownames(anno) %in% AB_name[,1]),1,drop=F]
19 | ann_color = list(
20 |   heatmap = c(Wild_emmer = "#8C510A", Domesticated_emmer = "#DFC27D", Freethreshing = "#F6E8C3", EU="#66C2A5", WA= "#FC8D62",CA="#8DA0CB", EA ="#E78AC3",SA="#A6D854",Tibet="#FFD92F"))
21 | path <- "/Users/guoyafei/Documents/01_VMap1.1/02_Environment/08_snpEff/VCF_withAn/225_TXT/AB" ##文件目录
22 | fileNames <- dir(path)  ##获取该路径下的文件名
23 | filePath <- sapply(fileNames, function(x){ 
24 |   paste(path,x,sep='/')})   ##生成读取文件路径
25 | data <- lapply(filePath, function(x){
26 |   read.table(x, header=F,stringsAsFactors = F)})
27 | AB_file <-read.table("AB_file_name.txt",header=F,stringsAsFactors = F)
28 | AB_tit <- AB_file[,1]
29 | pdf("test.pdf")
30 | for (i in c(1:length(data))){
31 |   all <- as.data.frame(data[[i]])
32 |   colnames(all) <- AB_name[,1]
33 |   all2 <- all[, rownames(AB_anno)]
34 |   pheatmap(all2, show_rownames=FALSE, show_colnames=FALSE,color = cols, legend_breaks = -1:2, legend_labels = c("./.", "0/0", "0/1", "1/1"), cluster_col = F, cluster_row = FALSE, annotation = AB_anno, annotation_colors = ann_color,annotation_names_col = F,main=AB_tit[i])
35 | }
36 | dev.off()
37 | 
38 | #--------------------------------------------------------------------------------------------------------------------------------------
39 | #D
40 | ann_color = list(
41 |   #Growing_Habit = c(Facultative = "yellow", Spring="orange", Winter="blue"),
42 |   heatmap = c(Strangulata = "#8C510A", EU="#66C2A5", WA= "#FC8D62",CA="#8DA0CB", EA ="#E78AC3",SA="#A6D854",Tibet="#FFD92F"))
43 | 
44 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/08_snpEff/VCF_withAn/225_TXT/D" ##文件目录
45 | fileNames <- dir(path)  ##获取该路径下的文件名
46 | filePath <- sapply(fileNames, function(x){ 
47 |   paste(path,x,sep='/')})   ##生成读取文件路径
48 | data <- lapply(filePath, function(x){
49 |   read.table(x, header=F,stringsAsFactors = F)})
50 | 
51 | D_name <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/08_snpEff/VCF_withAn/225_TXT/D_name.txt",header=F,stringsAsFactors = F)
52 | D_anno <- anno[which(rownames(anno) %in% D_name[,1]),1,drop=F]
53 | D_file <-read.table("D_file_name.txt",header=F,stringsAsFactors = F)
54 | D_tit <- D_file[,1]
55 | pdf("D.pdf")
56 | for (i in c(1:length(data))){
57 |   all <- as.data.frame(data[[i]])
58 |   colnames(all) <- D_name[,1]
59 |   all <- all[, rownames(D_anno)]
60 |   pheatmap(all, show_rownames=FALSE, show_colnames=FALSE,color = cols, legend_breaks = -1:2, legend_labels = c("./.", "0/0", "0/1", "1/1"), cluster_col = F, cluster_row = FALSE, annotation = D_anno, annotation_colors = ann_color,annotation_names_col = F,main=D_tit[i])
61 | }
62 | dev.off()
63 | 
64 | #annotation_colors = ann_colors
65 | #接下来可以使用06_Qmatrix_PieMap.r来画在地图上的单倍型分布。
66 | 
67 | #plot haplotype heatmap
68 | brewer.pal(9, "YlGnBu")[c(7)]
69 | "#225EA8"
70 | brewer.pal(9, "Blues")[c(2)]
71 | "#DEEBF7"
72 | brewer.pal(9, "YlOrRd")[c(4)]
73 | "#FEB24C"
74 | brewer.pal(9, "YlOrRd")[c(8)]
75 | "#BD0026"
76 | cols <- c("#225EA8","#DEEBF7","#FEB24C","#BD0026")
77 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/57_Gene_HaploType_Map.r:
--------------------------------------------------------------------------------
 1 | #work directory:
 2 | #/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/sample_group.txt
 3 | #/data1/home/yafei/003_Project3/bayenv/13pop/candidate_gene
 4 | #/data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225
 5 | #Rht-B1:
 6 | #TraesCS4B02G043100
 7 | #chr21 start:30861268 end:30863723
 8 | #ppd-D1
 9 | #TraesCS2D01G079600
10 | #chr11 start:33952048	end:33956269
11 | cat *cloned.gene | sort | uniq -c | awk '{print $2"\t"$3}' > top5.candidate.txt
12 | awk '{print $2}' top5.candidate.txt  > test
13 | grep -f test gene_v1.1_Lulab.gff3 |awk '{print $1"\t"$4"\t"$5"\t"$9}' |awk -F";" '{print $1}' |sed 's/ID=//' > test2
14 | awk 'NR==FNR{a[$2]=$1;b[$2]=$2} NR!=FNR {if($4 in b) {print $0"\t"a[$4]}}' top5.candidate.txt test2 | awk '{print $1"\t"$2-5000"\t"$3+5000"\t"$4"\t"$5}' |sed '1i Chr\tstart-5k\tend+5k\tID\tName' > gene_region.txt
15 | 
16 | cat gene_region.txt |while read chr from to ID Name
17 | do
18 |   if [ $chr != "Chr" ];then
19 |     bcftools filter /data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/chr${chr}.E6_Landrace_locate.vcf.gz --regions ${chr}:${from}-${to} > ${ID}-${Name}.vcf
20 |   fi
21 | done
22 | for i in `ls *vcf`
23 | do
24 | 
25 | vcftools --vcf $i --012
26 | paste out.012.pos <(cat out.012 | datamash transpose | sed '1d' ) > ${i::-4}.vcf.txt
27 | cat out.012.indv | datamash transpose | awk '{print "File\tChr\tPos\t"$0}' > indi.txt
28 | rm out.012*
29 | done
30 | for i in `ls *vcf.txt`
31 | do
32 | awk '{print FILENAME"\t"$0}' $i  | sed 's/.vcf.txt//g' > ${i::-4}.vcf.txt2
33 | done
34 | cat indi.txt *vcf.txt2 > all.pos.txt
35 | rm TraesCS*
36 | 
37 | library(reshape)
38 | library(ggplot2)
39 | library(RColorBrewer)
40 | require (rworldmap)
41 | setwd("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/")
42 | data <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/01_RDA_plot/225.taxa.txt", header=T,row.names = 1, sep="\t", stringsAsFactors = F)
43 | taxa <- read.table("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/20210829/Cluster_Location/cluster_70.txt",header=T,row.names = 1, stringsAsFactors = F)
44 | for(i in names(table(data$File))){
45 |   sub <- data[which(data$File==i),]
46 |   file <- paste(i,".pdf",sep="")
47 |   pdf(file)
48 |   for( j in c(1:dim(sub)[1])){
49 |     tit <- sub[j,3]
50 |     sample <- sub[j,4:228]
51 |     loc <- taxa[names(sample),4:5]
52 |     loc$type <- as.numeric(sample[1,])
53 |     loc$value <- 1
54 |     wheat_reshape <- cast(loc,Latitude_70+Longitude_70~type) 
55 |     wheat_reshape2 <- as.data.frame(wheat_reshape)
56 |     name <- names(table(loc$type))
57 |     if((j-1)%%4 !=0){
58 |       mapPies(wheat_reshape2,xlim=c(-10,130),ylim=c(10,70),nameX="Longitude_70",nameY="Latitude_70",nameZs=name,symbolSize=1.5,
59 |               zColours=brewer.pal(8, "Set3")[c(2,3,4,5)],barOrient='vert',oceanCol="white",landCol=brewer.pal(9,"Pastel1")[9],main="tit")
60 |       legend(25,95,box.lty=0,bg="transparent","108 landrace GrowHabbit", col="black")
61 |     }else{
62 |       par(mfrow=c(2,2),oma=c(1,1,1,1), mar=c(0,1,0,1), cex=1)
63 |       mapPies(wheat_reshape2,xlim=c(-10,130),ylim=c(10,70),nameX="Longitude_70",nameY="Latitude_70",nameZs=name,symbolSize=1.5,
64 |               zColours=brewer.pal(12, "Set3")[c(2,3,4,5)],barOrient='vert',oceanCol="white",landCol=brewer.pal(9,"Pastel1")[9], main="tit")
65 |       legend(25,95,box.lty=0,bg="transparent",tit, col="black")
66 |     }
67 |   }
68 |   dev.off()
69 | }
70 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/59_Depth.R:
--------------------------------------------------------------------------------
 1 | #depth.r
 2 | library(cowplot) 
 3 | library(ggExtra)
 4 | library(forcats)
 5 | library(ggplot2)
 6 | library(RColorBrewer)
 7 | library(aplot)
 8 | setwd("/Users/guoyafei/Desktop/NP/reviewer/depth")
 9 | # method 1 ------------
10 | dataFile <- c("tetra_depth.txt.gz")
11 | df1 <- read.delim(dataFile) %>% mutate(GenomeType = "AABB")
12 | dataFile <- c("hexa_depth.txt.gz")
13 | df2 <- read.delim(dataFile)%>% mutate(GenomeType = "AABBDD")
14 | dataFile <- c("diploid_depth.txt.gz")
15 | df3 <- read.delim(dataFile)%>% mutate(GenomeType = "DD")
16 | 
17 | df <- rbind(df1,df2,df3) %>% 
18 |   group_by(GenomeType) %>% 
19 |   slice_sample(.,n = 3000) %>% 
20 |   ungroup()
21 | 
22 | data <- read.table("all_100k.depth",header=F,stringsAsFactors = F)
23 | colnames(data) <- c("Depth","SD")
24 | 
25 | colB <- c('#ffd702','#fc6e6e',"#87cef9")
26 | df$GenomeType <- factor(df$GenomeType,levels = c("AABB","AABBDD","DD"))
27 | 
28 | p <- ggplot(data,aes(x=AverageDepth,y=SD))+
29 |   geom_point(alpha=0.5,shape=1,size=0.8)+
30 |   xlab("Mean of depth")+ylab("SD")+
31 |   #scale_color_manual(values = colB)+
32 |   xlim(0,15)+ylim(0,10)+
33 |   theme(
34 |     panel.background = element_blank(),
35 |     panel.border = element_rect(colour = "black", fill=NA, size=0.7)
36 |     ,legend.position = 'none'
37 |     ,text = element_text(size = 14))
38 | 
39 | p
40 | p2 <- ggMarginal(p, type="density",groupColour = F, groupFill = F)
41 | p2
42 | ggsave(p2,filename = "~/Documents/test.pdf",height = 4,width = 4)
43 | 
44 | # method 2 ------------
45 | data <- read.table("all_100k.depth",header=F,stringsAsFactors = F)
46 | colnames(data) <- c("Depth","SD")
47 | data <- data[which(data$Depth > 0 ),]
48 | data <- data[which(data$Depth < 15 & data$SD < 10),]
49 | data <- data[sample(1:85255, 30000),]
50 |   p1<-ggplot(data,aes(x = Depth, y = SD))+
51 |   geom_point(alpha= 0.1)+
52 |   scale_color_brewer(palette = "Set2")+
53 |   #scale_color_manual(values=c("green","blue","grey"))+
54 |   theme_bw()
55 | #geom_hline(yintercept = 3.86971,lty="dashed")+
56 | #geom_hline(yintercept = 6.15844,lty="dashed")+
57 | #geom_vline(xintercept = 3.91843,lty="dashed")+
58 | #geom_vline(xintercept = 9.36970,lty="dashed")
59 | p2<-ggplot(data,aes(Depth))+
60 |   geom_density(fill="grey",alpha=0.5)+
61 |   scale_y_continuous(expand = c(0,0))+
62 |   theme_minimal()+
63 |   theme(axis.title = element_blank(),
64 |         axis.text = element_blank(),
65 |         axis.ticks = element_blank())
66 | p3<-ggplot(data,aes(SD))+
67 |   geom_density(fill="grey",alpha=0.5)+
68 |   scale_y_continuous(expand = c(0,0))+
69 |   theme_minimal()+
70 |   theme(axis.title = element_blank(),
71 |         axis.text = element_blank(),
72 |         axis.ticks = element_blank())+
73 |   coord_flip()
74 | #累计分布函数
75 | p4<-ggplot(data, aes(Depth)) + 
76 |   theme_minimal()+
77 |   theme(axis.title = element_blank(),
78 |         axis.text = element_blank(),
79 |         axis.ticks = element_blank())+
80 |   scale_color_brewer(palette = "Set2")+
81 |   stat_ecdf()+
82 |   scale_y_reverse()
83 | p5<-ggplot(data, aes(SD)) + 
84 |   scale_color_brewer(palette = "Set2")+
85 |   theme_minimal()+
86 |   theme(axis.title = element_blank(),
87 |         axis.text = element_blank(),
88 |         axis.ticks = element_blank())+
89 |   stat_ecdf()+
90 |   coord_flip()+
91 |   scale_y_reverse()
92 | p1%>%
93 |   insert_top(p2,height = 0.3)%>%
94 |   insert_right(p3,0.3)
95 | 
96 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/60_response.R:
--------------------------------------------------------------------------------
 1 | # 相关系数
 2 | library(pheatmap)
 3 | require(reshape)
 4 | require (rworldmap)
 5 | require(rworldxtra)
 6 | library(RColorBrewer)
 7 | setwd("/Users/guoyafei/Desktop/btr1/ibs")
 8 | input <- c("btr1-A.10k.all.ibs","btr1-A.1M.all.ibs","btr1-A.200k.all.ibs","btr1-A.2k.all.ibs","btr1-A.500k.all.ibs","btr1-B.10k.all.ibs","btr1-B.1M.all.ibs","btr1-B.200k.all.ibs","btr1-B.2k.all.ibs","btr1-B.500k.all.ibs")
 9 | output <- c("btr1-A.10k.all.ibs.pdf","btr1-A.1M.all.ibs.pdf","btr1-A.200k.all.ibs.pdf","btr1-A.2k.all.ibs.pdf","btr1-A.500k.all.ibs.pdf","btr1-B.10k.all.ibs.pdf","btr1-B.1M.all.ibs.pdf","btr1-B.200k.all.ibs.pdf","btr1-B.2k.all.ibs.pdf","btr1-B.500k.all.ibs.pdf")
10 | 
11 | for(i in c(1:length(input))){
12 |   test <- read.table(input[i],row.names = 1, header = T)
13 |   pdf(output[i],width=10,height=10)
14 |   pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="ward.D2")
15 |   pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="complete")
16 |   pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="average")
17 |   pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="mcquitty")
18 |   pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="median")
19 |   pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="centroid")
20 |   pheatmap(test, show_rownames=T, show_colnames=T, cluster_col = T, clustering_method="ward.D2",cluster_row = T,main="single")
21 |   dev.off()
22 | }
23 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/Scripts.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/01_Vmap1.1R/assets/16442473466436.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/01_Vmap1.1R/assets/16442473466436.jpg


--------------------------------------------------------------------------------
/02_Vmap3R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/02_Vmap3R/.DS_Store


--------------------------------------------------------------------------------
/02_Vmap3R/01_Tree_Anno.r:
--------------------------------------------------------------------------------
 1 | library(ggmap)#Load libraries
 2 | library(ggplot2)
 3 | hpars<-read.table("https://sites.google.com/
 4 | site/arunsethuraman1/teaching/hpars.dat?revision=1")
 5 | 
 6 | par(mfrow = c(2, 2))
 7 | pie(rep(1,4),labels=c("AF","AM","AS","EU"), col = c("#8DD3C7","#FFFFB3","#BEBADA","#FB8072"), main = "Branch-Region")
 8 | pie(rep(1,7),labels=c("Cultivar","Domesticated_emmer","Free_threshing_tetraploids","Landrace","OtherHexaploid","OtherTetraploid","Wild_emmer"), col = c("#836FFF","#87CEFF","#8B636C","#8DD3C7","#9B30FF","#B3DE69","#B452CD"), main = "Label-Subspecies")
 9 | pie(rep(1,5),labels=c("Kong","LuLab","WEGA","ZKS","ZN"), col = c("#8DD3C7","#FB8072","#B3DE69","#8B636C","#80B1D3"), main = "Branch-Source")
10 | pie(rep(1,4),labels=c("ExclusionHexaploid","ExclusionTetraploid","Hexaploid","Tetraploid"), col = c("#8DD3C7","#FFFFB3","#BEBADA","#FB8072"), main = "Label-In-Exclu")
11 | 


--------------------------------------------------------------------------------
/02_Vmap3R/02_Enrich.r:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(RColorBrewer)
 3 | display.brewer.all()
 4 | col1 <- brewer.pal(n = 8, name = "YlOrBr")[c(2,5)]
 5 | col2 <- brewer.pal(n = 8, name = "YlGnBu")[c(2,5)]
 6 | col3 <- brewer.pal(n = 8, name = "YlGn")[c(2,5)]
 7 | col4 <- brewer.pal(n = 8, name = "Purples")[c(2,5)]
 8 | col5 <- brewer.pal(n = 8, name = "Greys")[c(2,5)]
 9 | 
10 | #setwd("/Users/guoyafei/Documents/Lulab/Project-2-Migration/anno_gene")
11 | #nameA <- c("Domesticated_einkorn", "Domesticated_emmer","Durum","EA","EU","Indian_dwarf","Khorasan_wheat","Landrace","Macha","Persian_wheat","Polish_wheat","Rivet_wheat","SCA","Spelt","Tibetan_semi_wild","Urartu","Vavilovii","WA","Wild_Einkorn","Wild_emmer","Xinjiang_wheat")
12 | #nameB <- c("Domesticated_emmer","Durum","EA","EU","Khorasan_wheat", "Landrace","Macha","Persian_wheat","Polish_wheat","Rivet_wheat","SCA","Spelt","Speltoides","Vavilovii","WA","Wild_emmer","Xinjiang_wheat","Yunan_wheat")
13 | #nameD <- c("Anathera","Club_wheat","EA","EU","Indian_dwarf","Landrace","Macha","Meyeri","SCA","Spelt","Strangulata","Tibetan_semi_wild","Vavilovii","WA","Xinjiang_wheat","Yunan_wheat")
14 | 
15 | path <- "//Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/Go/TXT"
16 | fileNames <- dir(path) 
17 | filePath <- sapply(fileNames, function(x){
18 |   paste(path,x,sep='/')})   
19 | data <- lapply(filePath, function(x){
20 |   read.table(x, header=T, sep = "\t", stringsAsFactors=F)}) 
21 | #for(i in 1:length(data)){
22 | #  data[[i]]$Name <- nameB[i]
23 | #}
24 | #all <- data.frame()
25 | #for(i in 1:length(data)){
26 | #  all <- rbind(all, data[[i]])
27 | #}
28 | #plot_list = list() 
29 | #for(i in 1:16){
30 | p <- ggplot(data=data[[6]])+
31 |   geom_bar(aes(x= Description, y=Number.in.input.list, fill=(FDR)), stat='identity') +
32 |   coord_flip() +
33 |   #facet_grid(.~Name,scales="free") +
34 |   #facet_wrap(~Name,ncol = 1)+
35 |   scale_fill_gradient(expression(FDR),low=col1[2], high = col1[1]) +
36 |   ylab("Gene number") +
37 |   xlab("GO term description") +
38 |   #expand_limits(y=c(0,8))+
39 |   #ylim(0,100)+
40 |   theme(
41 |     axis.text.x=element_text(color="black",size=rel(0.8)),
42 |     axis.text.y=element_text(color="black", size=rel(0.3)),
43 |     axis.title.x = element_text(color="black", size=rel(5)),
44 |     axis.title.y = element_blank(),
45 |     legend.text=element_text(color="black",size=rel(0.2)),
46 |     legend.title = element_text(color="black",size=rel(0.7))
47 |     #legend.position=c(0,1),legend.justification=c(-1,0)
48 |     #legend.position="top",
49 |   )+
50 | #  scale_x_discrete(limits= c("Domesticated_einkorn", "Domesticated_emmer","Durum","EA","EU","Indian_dwarf","Khorasan_wheat","Landrace","Macha","Persian_wheat","Polish_wheat","Rivet_wheat","SCA","Spelt","Tibetan_semi_wild","Urartu","Vavilovii","WA","Wild_Einkorn","Wild_emmer","Xinjiang_wheat"))+
51 |   theme_bw()+theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=1,colour="black"))
52 | #plot_list[[i]] = p
53 | #}
54 | for (i in 1:16) { 
55 |   file_name = paste("D",i,".pdf",sep="") 
56 |   pdf(file_name,height = 12,width = 9) 
57 |   print(plot_list[[i]]) 
58 |   dev.off() 
59 | } 
60 | 
61 | 


--------------------------------------------------------------------------------
/02_Vmap3R/04_IBS.r:
--------------------------------------------------------------------------------
 1 | #Required-----
 2 | library(RColorBrewer)
 3 | #读取Subspecies文件，共12个。
 4 | path <- "/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/05_VcfCheck/04_TaxaGroup/03_Subspecies"
 5 | fileNames <- dir(path) 
 6 | filePath <- sapply(fileNames, function(x){
 7 |   paste(path,x,sep='/')})   
 8 | data <- lapply(filePath, function(x){
 9 |   read.table(x, header=T,stringsAsFactors=F)}) 
10 | length(data)
11 | #A_IBS----
12 | #读取ibs文件,并且计算每个subspecies与CS的IBS平均值。
13 | spName <- c("CS", "Cultivar", "Domesticated_emmer", "Free_threshing_tetraploid", "Landrace", "Other_hexaploid", "Other_tetraploid", "Stranglata", "Tibetan_semi_wild", "Wild_emmer")
14 | A <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/07_Test0720/depth_statics/IBS/1A.IBS.txt",header=T,stringsAsFactors=F)
15 | #dim(A)
16 | #1355 1356
17 | A$Lineage <- NA
18 | rownames(A) <- A$Dxy
19 | #给每一行赋subspecies的值
20 | 
21 | for(i in 1:length(data)){
22 |   A[which(A$Dxy %in% data[[i]][,1]),1357] <- spName[i]
23 | }
24 | 
25 | #提取CS的两列
26 | subA <- cbind(A[,which(colnames(A) %in% data[[1]][,1])],A[,1357])
27 | subA$lineage <- "A"
28 | colnames(subA) <- c("IBS1","IBS2","subspecies","lineage")
29 | #B_IBS----
30 | #读取ibs文件,并且计算每个subspecies与CS的IBS平均值。
31 | spName <- c("CS", "Cultivar", "Domesticated_emmer", "Free_threshing_tetraploid", "Landrace", "Other_hexaploid", "Other_tetraploid", "Stranglata", "Tibetan_semi_wild", "Wild_emmer")
32 | B <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/07_Test0720/depth_statics/IBS/1B.IBS.txt",header=T,stringsAsFactors=F)
33 | #dim(B)
34 | #1355 1356
35 | B$Lineage <- NA
36 | rownames(B) <- B$Dxy
37 | #给每一行赋subspecies的值
38 | for(i in 1:length(data)){
39 |   B[which(B$Dxy %in% data[[i]][,1]),1357] <- spName[i]
40 | }
41 | #提取CS的两列
42 | subB <- cbind(B[,which(colnames(B) %in% data[[1]][,1])],B[,1357])
43 | subB$lineage <- "B"
44 | colnames(subB) <- c("IBS1","IBS2","subspecies","lineage")
45 | #D_IBS----
46 | #读取ibs文件,并且计算每个subspecies与CS的IBS平均值。
47 | spName <- c("CS", "Cultivar", "Domesticated_emmer", "Free_threshing_tetraploid", "Landrace", "Other_hexaploid", "Other_tetraploid", "Stranglata", "Tibetan_semi_wild", "Wild_emmer")
48 | D <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/07_Test0720/depth_statics/IBS/1D.IBS.txt",header=T,stringsAsFactors=F)
49 | #dim(D)
50 | #1334 1336
51 | D$Lineage <- NA
52 | rownames(D) <- D$Dxy
53 | #给每一行赋subspecies的值
54 | for(i in 1:length(data)){
55 |   D[which(D$Dxy %in% data[[i]][,1]),1337] <- spName[i]
56 | }
57 | #提取CS的两列
58 | subD <- cbind(D[,which(colnames(D) %in% data[[1]][,1])],D[,1337])
59 | subD$lineage <- "D"
60 | colnames(subD) <- c("IBS1","IBS2","subspecies","lineage")
61 | 
62 | #画每组比较的boxplot图
63 | all <- rbind(subA,subB,subD)
64 | p <- ggplot(all, aes(subspecies, IBS1)) + 
65 |   geom_boxplot(outlier.shape = NA, outlier.colour = NA, aes(fill=factor(lineage))) +
66 |   scale_fill_brewer(palette = "Accent") +
67 |   guides(fill=guide_legend(titlec = NULL)) +
68 |   scale_x_discrete(limits = c("Wild_emmer","Domesticated_emmer", "Free_threshing_tetraploid", "Other_tetraploid", "Landrace", "Cultivar",  "Other_hexaploid", "Tibetan_semi_wild",  "Stranglata"))+
69 |   ylab("PI") +
70 |   xlab("Lineages") +
71 |   scale_y_continuous(limits=c(0,0.25)) + 
72 |   theme_classic() +
73 |   theme(axis.text.x = element_text(size=12, angle = 30, hjust = 0.5, vjust = 0.5), axis.text.y = element_text(size=15))
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/02_Vmap3R/06_Map2.r:
--------------------------------------------------------------------------------
 1 | #library(ggplot2)
 2 | library(ggmap)
 3 | #library(sp)
 4 | #library(maptools)
 5 | #library(maps)
 6 | #library(psych)
 7 | taxa <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/05_VcfCheck/taxa_location.txt", header=T, stringsAsFactors = F)
 8 | taxa_AB <- taxa[which(taxa$Ploidy=="AABB"),]
 9 | taxa_ABD <- taxa[which(taxa$Ploidy=="AABBDD"),]
10 | taxa_D <- taxa[which(taxa$Ploidy=="DD"),]
11 | 
12 | #绘图
13 | mp<-NULL
14 | mapworld<-borders("world",colour = "gray70",fill="white") 
15 | mp<-ggplot()+mapworld+ylim(-90,90)+theme_classic()
16 | 
17 | #AABBDD----
18 | mp2<-mp+geom_point(aes(x=taxa_ABD$Longitude, y=taxa_ABD$Latitude, color=taxa_ABD$TreeValidatedGroupbySubspecies),size=1.5)+scale_size(range=c(1,1))+ theme_classic()+
19 |   theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+
20 |   #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 
21 |  #                   breaks=c("AA", "SS", "DD", "AABB","AABBDD"),
22 |  #                   labels=c("AA", "SS", "DD", "AABB","AABBDD"))+
23 |   theme(axis.text = element_blank()) +   ## 删去所有刻度标签
24 |   theme(axis.ticks = element_blank()) +
25 |   theme(panel.grid =element_blank()) + 
26 |   theme(axis.ticks.y = element_blank())
27 | 
28 | #AABB----
29 | mp2<-mp+geom_point(aes(x=taxa_AB$Longitude, y=taxa_AB$Latitude, color=taxa_AB$TreeValidatedGroupbySubspecies),size=1.5)+scale_size(range=c(1,1))+ theme_classic()+
30 |   theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+
31 |   #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 
32 |   #                   breaks=c("AA", "SS", "DD", "AABB","AABBDD"),
33 |   #                   labels=c("AA", "SS", "DD", "AABB","AABBDD"))+
34 |   theme(axis.text = element_blank()) +   ## 删去所有刻度标签
35 |   theme(axis.ticks = element_blank()) +
36 |   theme(panel.grid =element_blank()) + 
37 |   theme(axis.ticks.y = element_blank())
38 | 
39 | #DD----
40 | mp2<-mp+geom_point(aes(x=taxa_D$Longitude, y=taxa_D$Latitude, color=taxa_D$TreeValidatedGroupbySubspecies),size=1.5)+scale_size(range=c(1,1))+ theme_classic()+
41 |   theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+
42 |   #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 
43 |   #                   breaks=c("AA", "SS", "DD", "AABB","AABBDD"),
44 |   #                   labels=c("AA", "SS", "DD", "AABB","AABBDD"))+
45 |   theme(axis.text = element_blank()) +   ## 删去所有刻度标签
46 |   theme(axis.ticks = element_blank()) +
47 |   theme(panel.grid =element_blank()) + 
48 |   theme(axis.ticks.y = element_blank())
49 | #计算各位点的点的数量并标记----
50 | pos_count <- read.table("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/05_VcfCheck/pos_count.txt",header=T,stringsAsFactors = F)
51 | mp2<-mp+geom_point(aes(x=pos_count$Longitude, y=pos_count$Latitude),size=2)+scale_size(range=c(1,1))+ theme_classic()+
52 |   theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+
53 |   #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 
54 |   #                   breaks=c("AA", "SS", "DD", "AABB","AABBDD"),
55 |   #                   labels=c("AA", "SS", "DD", "AABB","AABBDD"))+
56 |   theme(axis.text = element_blank()) +   ## 删去所有刻度标签
57 |   theme(axis.ticks = element_blank()) +
58 |   theme(panel.grid =element_blank()) + 
59 |   theme(axis.ticks.y = element_blank())
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/02_Vmap3R/07_Nucdiff.sh:
--------------------------------------------------------------------------------
1 | #安装mumer，biopython packages
2 | nucdiff Chinese_Spring_chr1A.fa Jagger_chr1A.fa ./Out my_prefix CS_Jagger
3 | #无结果
4 | #切断query序列，逢N就断开，再比对
5 | nohup cat jagger_chr1A.fa | seqkit fx2tab | cut -f 2 | sed -r 's/n+/\n/gi' | cat -n | seqkit tab2fx | seqkit replace -p "(.+)" -r "Contig{nr}" > jagger_chr1A_contig.fa &
6 | nucdiff Chinese_Spring_chr1A.fa jagger_chr1A_contig.fa ./Out_contig my_prefix CS_Jagger_contig
7 | 


--------------------------------------------------------------------------------
/02_Vmap3R/09_PCA2.r:
--------------------------------------------------------------------------------
 1 | install.packages("gdata")
 2 | library(cinaR)
 3 | library(ggplot2)
 4 | library(RColorBrewer)
 5 | library(aplot)
 6 | library(grid)
 7 | library(gridExtra)
 8 | display.brewer.all()
 9 | col <- brewer.pal(n = 8, name = "Spectral")
10 | setwd("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure")
11 | eigvec<- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/ABlineage.maf0.05.5k.eigenvec",header=T,stringsAsFactors = F)
12 | eigval <- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/ABlineage.maf0.05.5k.eigenval",header=F,stringsAsFactors = F)
13 | mds <- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/ABlineage.maf0.05.60k.mds",header=T,stringsAsFactors = F)
14 | popinfo <- "/Users/guoyafei/Documents/02_VmapIII/01_表格/V.2/Lulab_germplasm_Info.xlsx"
15 | poptable <- read.xls(popinfo, sheet=1, verbose=FALSE, fill=NA)
16 | popAB <- poptable[which(poptable$GenomeNAtype!="DD"),c(1,2,3,12,13,14,18,20)]
17 | colnames(popAB)[1] <- "FID"
18 | all <- merge(mds,popAB,by="FID")
19 | #PC1:19.03% PC2:11.18%
20 | #PC1:19.03% PC2:11.18%
21 | #colnames(popAB) <- c("order","exp_id","vcf_id","group","color","pch","group2")
22 | #source("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/16_分群/pca.plot2d.r")
23 | #pca_plot(eigenvector = eigvec, eigenvalue = eigval,
24 | #         group = popinfo, key = key, outdir = od,
25 | #         shape = T, shapes = pop$group2, border = T, border_size = 2.5,
26 | #         line0 = T, line0_size = 1)
27 | p<-ggplot(all,aes(x=C1,y=C2,shape=as.factor(all$PCA_shape),color=as.factor(all$PCA_color)))
28 | p<-p+geom_point()+
29 |   scale_color_brewer(palette = "Accent")+
30 |   scale_shape_manual(values = c(1:11))+
31 |   theme_classic()+
32 |   theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'), plot.title = element_text(hjust = 0.5), legend.key = element_rect(fill = 'transparent')) + 
33 |   labs(x = 'PCA 1 (19.03%)', y = 'PCA 2 (11.18%)') +  
34 |   theme_bw() +
35 |   theme(
36 |     #legend.position="none",
37 |     #panel.border = element_blank(),
38 |     axis.line.y = element_line(),
39 |     axis.line.x = element_line(),
40 |     panel.grid.major.x = element_blank(),
41 |     panel.grid.minor.x = element_blank(),
42 |     axis.text.x=element_text(size=15),
43 |     axis.text.y=element_text(size=15),
44 |     axis.title.y=element_text(size = 15),
45 |     axis.title.x=element_text(size = 15),
46 | )
47 | site <- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/AB.maf0.05_siteQCfile.txt",header=T,stringsAsFactors = F)
48 | taxa <- read.table("/Users/guoyafei/Documents/02_VmapIII/04_Statistics/04_Structure/AB.maf0.05_taxaQCfile.txt",header=T,stringsAsFactors = F)
49 | p0 <- ggplot(data=site,aes(x=MissingRate)) + 
50 |   geom_histogram(binwidth=0.01,fill="#69b3a2", 
51 |                  color="#e9ecef", alpha=0.9)+
52 |   theme_bw()+
53 |   labs(x="MissingRate",y="SNP Count")+
54 |   xlim(0,0.2)+
55 |   theme(
56 |   #legend.position="none",
57 |   #panel.border = element_blank(),
58 |   axis.line.y = element_line(),
59 |   axis.line.x = element_line(),
60 |   panel.grid.major.x = element_blank(),
61 |   panel.grid.minor.x = element_blank(),
62 |   axis.text.x=element_text(size=15),
63 |   axis.text.y=element_text(size=15),
64 |   axis.title.y=element_text(size = 15),
65 |   axis.title.x=element_text(size = 15),
66 | 
67 | )
68 | p0
69 | 


--------------------------------------------------------------------------------
/02_Vmap3R/10_Maf-Dis.r:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | setwd("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Test_chr001")
 3 | data <- read.table("R2-Maf-Dis.txt",header=T,stringsAsFactors = F)
 4 | library(reshape)
 5 | data$R_bin <- NA
 6 | data$Maf_bin <- NA
 7 | data$Dis_bin <- NA
 8 | 
 9 | data[which(data$R.2 >=0 & data$R.2 < 0.5),4] <- "0<=r2<0.5"
10 | data[which(data$R.2 >=0.5 & data$R.2 < 0.75),4] <- "0.5<=r2<0.75"
11 | data[which(data$R.2 >=0.75 & data$R.2 < 1),4] <- "0.75<=r2<1"
12 | data[which(data$R.2 == 1),4] <- "r2=1"
13 | 
14 | data[which(data$SNP_A_MAF >=0 & data$SNP_A_MAF < 0.05),5] <- "0-0.05"
15 | data[which(data$SNP_A_MAF >=0.05 & data$SNP_A_MAF < 0.1),5] <- "0.05-0.1"
16 | data[which(data$SNP_A_MAF >=0.1 & data$SNP_A_MAF < 0.15),5] <- "0.1-0.15"
17 | data[which(data$SNP_A_MAF >=0.15 & data$SNP_A_MAF < 0.2),5] <- "0.15-0.2"
18 | data[which(data$SNP_A_MAF >=0.2 & data$SNP_A_MAF < 0.25),5] <- "0.2-0.25"
19 | data[which(data$SNP_A_MAF >=0.25 & data$SNP_A_MAF < 0.3),5] <- "0.25-0.3"
20 | data[which(data$SNP_A_MAF >=0.3 & data$SNP_A_MAF < 0.35),5] <- "0.3-0.35"
21 | data[which(data$SNP_A_MAF >=0.35 & data$SNP_A_MAF < 0.4),5] <- "0.35-0.4"
22 | data[which(data$SNP_A_MAF >=0.4 & data$SNP_A_MAF < 0.45),5] <- "0.4-0.45"
23 | data[which(data$SNP_A_MAF >=0.45 & data$SNP_A_MAF <= 0.5),5] <- "0.45-0.5"
24 | 
25 | data[which(data$Distance >=0 & data$Distance < 10),6] <- "0-10"
26 | data[which(data$Distance >=10 & data$Distance < 20),6] <- "10-20"
27 | data[which(data$Distance >=20 & data$Distance < 30),6] <- "20-30"
28 | data[which(data$Distance >=30 & data$Distance < 40),6] <- "30-40"
29 | data[which(data$Distance >=40 & data$Distance < 50),6] <- "40-50"
30 | data[which(data$Distance >=50 & data$Distance < 60),6] <- "50-60"
31 | data[which(data$Distance >=60 & data$Distance < 70),6] <- "60-70"
32 | data[which(data$Distance >=70 & data$Distance < 80),6] <- "70-80"
33 | data[which(data$Distance >=80 & data$Distance < 90),6] <- "80-90"
34 | data[which(data$Distance >=90 & data$Distance <= 100),6] <- "90-100"
35 | 
36 | R <- data[,c(4,5)]
37 | R$num <- 1
38 | md <- melt(R,id=(c("Maf_bin","R_bin")))
39 | md2 <- rbind(bin1[1:2000,],bin2[1:2000,],bin3[1:2000,],bin4[1:2000,],bin5[1:2000,],bin6[1:2000,],bin7,bin8,bin9)
40 | newdata <- cast(md2,Maf_bin+R_bin~value)
41 | colnames(newdata)[3] <- "num"
42 | 
43 | 
44 | ggplot(data = newdata, mapping = aes(x = factor(Maf_bin), y = newdata$num, fill = newdata$R_bin)) + geom_bar(stat = 'identity', position = 'fill')+
45 |   theme_classic() +
46 |   labs(x = "Maf", y = "Propotion of SNPs(%)", title = "Allele frequency and R2 for the 10M regions with same snp numbers") + 
47 |   theme(legend.text = element_text(size = 14),legend.title=element_blank(),plot.title = element_text(hjust = 0.5,size=20),axis.title = element_text(size = 20),axis.text.x = element_text(size = 15,vjust = 0.5, hjust = 0.5,angle = 45),axis.text.y = element_text(size = 15))
48 | 
49 | 
50 | D <-  data[!is.na(data$Dis_bin),c(5,6)]
51 | D$num <- 1
52 | md <- melt(D,id=(c("Maf_bin","Dis_bin")))
53 | newdata <- cast(md,Maf_bin+Dis_bin~value)
54 | colnames(newdata)[3] <- "num"
55 | 
56 | ggplot(data = newdata, mapping = aes(x = factor(Maf_bin), y = newdata$num, fill = newdata$Dis_bin)) + geom_bar(stat = 'identity', position = 'fill')+
57 |   theme_classic() +
58 |   labs(x = "Maf", y = "Propotion of SNPs(%)", title = "Allele frequency and Distance of SNPs for the 10M regions") + 
59 |   theme(legend.text = element_text(size = 14),legend.title=element_blank(),plot.title = element_text(hjust = 0.5,size=20),axis.title = element_text(size = 20),axis.text.x = element_text(size = 15,vjust = 0.5, hjust = 0.5,angle = 45),axis.text.y = element_text(size = 15))
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/02_Vmap3R/11_Statistic.sh:
--------------------------------------------------------------------------------
 1 | #提取样本
 2 | for chr in {05,06,11,12,17,18,23,24,29,30,35,36,41,42}
 3 | do
 4 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/DD_VCF/chr0${chr}.vcf.gz --max-alleles 4 --keep /data2/yafei/004_Vmap3/VCF/taxa_DD.txt --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.vcf.gz &
 5 | done b
 6 | 
 7 | #过滤mac以及多等位基因-AABB
 8 | for chr in {01,02,03,04,07,08,09,10,13,14,15,16,19,20,21,22,25,26,27,28,31,32,33,34,37,38,39,40}
 9 | do
10 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/AABB_VCF/chr0${chr}.vcf.gz --min-alleles 2 --max-alleles 2 --mac 2 --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/chr0${chr}.mac.bialle.vcf.gz &
11 | done
12 | #过滤mac以及多等位基因-DD
13 | for chr in {05,06,11,12,17,18,23,24,29,30,35,36,41,42}
14 | do
15 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.vcf.gz --min-alleles 2 --max-alleles 2 --mac 2 --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.mac.bialle.vcf.gz
16 | done
17 | #统计-AABB
18 | java -jar /data2/yafei/004_Vmap3/VCF/CheckVcf.jar /data2/yafei/004_Vmap3/VCF/DD_VCF /data2/yafei/004_Vmap3/VCF/DD_VCF/checkVcf.txt
19 | 
20 | #拆分VCF为INdel和SNP-AABB
21 | for chr in {01,02,03,04,07,08,09,10,13,14,15,16,19,20,21,22,25,26,27,28,31,32,33,34,37,38,39,40}
22 | do
23 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/filt_AABB/chr0${chr}.mac.bialle.vcf.gz --remove-indels --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/Snp/chr0${chr}.snp.vcf.gz &
24 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/filt_AABB/chr0${chr}.mac.bialle.vcf.gz --keep-only-indels --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/Indel/chr0${chr}.indel.vcf.gz &
25 | done
26 | #拆分VCF为INdel和SNP-DD
27 | for chr in {05,06,11,12,17,18,23,24,29,30,35,36,41,42}
28 | do
29 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.mac.bialle.vcf.gz --remove-indels --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/Snp/chr0${chr}.snp.vcf.gz &
30 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/finalDD_vcf/chr0${chr}.mac.bialle.vcf.gz --keep-only-indels --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/Indel/chr0${chr}.indel.vcf.gz &
31 | done
32 | 
33 | vcftools --gzvcf /data2/yafei/004_Vmap3/VCF/AABB_VCF/chr002.vcf.gz --min-alleles 2 --max-alleles 2 --mac 2 --recode --recode-INFO-all --stdout | bgzip -c > /data2/yafei/004_Vmap3/VCF/chr002.mac.bialle.vcf.gz &
34 | 


--------------------------------------------------------------------------------
/02_Vmap3R/13_VcfStatistic.r:
--------------------------------------------------------------------------------
 1 | #过滤maf提取样本
 2 | #AA
 3 | for chr in {1,2,7,8,13,14,19,20,25,26,31,32,37,38}
 4 | do
 5 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/AA_taxa.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/AA/chr${chr}.mafAA.vcf.gz 
 6 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/AA/chr${chr}.mafAA.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/AA/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/AA/chr${chr}_taxaQCfile.txt > nohupA 2>& 1 
 7 | done
 8 | #BB
 9 | for chr in {3,4,9,10,15,16,21,22,27,28,33,34,39,40}
10 | do
11 | vcftools --gzvcf /data1/home/yafei/Project3/Vmap1.1/chr${chr}.all.vcf.gz --keep /data2/yafei/Project3/group/SS_taxa.txt --max-missing 0.1 --maf 0.0000001 --recode --stdout | bgzip -c > /data1/home/yafei/Project3/Maf/mafBB/chr${chr}.mafBB.vcf.gz 
12 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data1/home/yafei/Project3/Maf/mafBB/chr${chr}.mafBB.vcf.gz --out /data1/home/yafei/Project3/Maf/mafBB/chr${chr}_siteQCfile.txt --out2 /data1/home/yafei/Project3/Maf/mafBB/chr${chr}_taxaQCfile.txt > nohupB 2>& 1 
13 | done
14 | #DD
15 | for chr in {5,6,11,12,17,18,23,24,29,30,35,36,41,42}
16 | do
17 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/DD_taxa.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/DD/chr${chr}.mafDD.vcf.gz 
18 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/DD/chr${chr}.mafDD.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/DD/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/DD/chr${chr}_taxaQCfile.txt > nohupD 2>& 1 
19 | done
20 | #AABB
21 | for chr in {1,2,7,8,13,14,19,20,25,26,31,32,37,38,3,4,9,10,15,16,21,22,27,28,33,34,39,40}
22 | do
23 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/AABB_taxa.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/AABB/chr${chr}.mafAABB.vcf.gz 
24 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/AABB/chr${chr}.mafAABB.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/AABB/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/AABB/chr${chr}_taxaQCfile.txt > nohupAB 2>& 1 
25 | done
26 | #AABBDD
27 | for chr in {1..42}
28 | do
29 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/AABBDD_taxa.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/AABBDD/chr${chr}.mafAABBDD.vcf.gz 
30 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/AABBDD/chr${chr}.mafAABBDD.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/AABBDD/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/AABBDD/chr${chr}_taxaQCfile.txt > nohupABD 2>& 1 
31 | done
32 | #Landrace
33 | for chr in {1..42}
34 | do
35 | vcftools --vcf /data2/yafei/Project3/V+W_E2/E2_all/chr${chr}.all.vcf --keep /data2/yafei/Project3/group/subspecies/sub_Landrace_new.txt --maf 0.0000001 --recode --stdout | bgzip -c > /data2/yafei/Project3/V+W_E2/E2_maf/Landrace/chr${chr}.mafLandrace.vcf.gz 
36 | java -jar /data1/home/yafei/C36_checkQuality.jar --file /data2/yafei/Project3/V+W_E2/E2_maf/Landrace/chr${chr}.mafLandrace.vcf.gz --out /data2/yafei/Project3/V+W_E2/E2_maf/Landrace/chr${chr}_siteQCfile.txt --out2 /data2/yafei/Project3/V+W_E2/E2_maf/Landrace/chr${chr}_taxaQCfile.txt > nohupLand 2>& 1 
37 | done
38 | #nohup sh merge_maf.sh &
39 | 
40 | 


--------------------------------------------------------------------------------
/02_Vmap3R/14_Snp_Density.sh:
--------------------------------------------------------------------------------
 1 | #统计VCF文件的snp的密度
 2 | #hg19.bed
 3 | chr1 248956422
 4 | chr2 242193529
 5 | chr3 198295559
 6 | ....
 7 | 
 8 | bedtools makewindows -g hg19.bed -w 1000000 > windows.bed
 9 | bedtools coverage -a windows.bed -b test.vcf -counts > coverage.txt
10 | 
11 | for i in {1..42}
12 | do
13 | bedtools coverage -a ${i}.bed -b ../chr00${i}.vcf.gz -counts > chr00${i}.coverage.txt
14 | done
15 | 
16 | #如果VCF文件过大，gz文件大于20G会报错，解决办法如下：
17 | 
18 | for i in {1,3,4,5,7,8,9}
19 | #{1,3,7,9}
20 | do
21 | bedmap --echo --count --delim '\t' ${i}.bed <( gunzip -c ../chr00${i}.vcf.gz | vcf2bed --max-mem=100G --sort-tmpdir="~/large/dir" ) > chr00${i}.coverage.txt
22 | done
23 | 
24 | for i in {10,11,13,14,15,16,17,19,20,21,22,23,25,26,27,28,29,31,32,33,34,35,37,38,39,40,41,42}
25 | #{10,13,14,15,16,22,23,25,26,27,28,29,31,32,33,34,35,37,38,39,40,41,42}
26 | do
27 | bedmap --echo --count --delim '\t' ${i}.bed <( gunzip -c ../chr0${i}.vcf.gz | vcf2bed --max-mem=100G --sort-tmpdir="~/large/dir" ) > chr0${i}.coverage.txt
28 | done
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/03_Analysis/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/03_Analysis/.DS_Store


--------------------------------------------------------------------------------
/03_Analysis/01_ChangeFileFormat_1.r:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------转换WW原始文件.R-------------------------------------------------------------------------------------------------------------
 2 | #读取文件：203服务器
 3 | setwd("/data1/home/yafei/SNP/WW")
 4 | all <- read.table("Balfourier_et_al_Wheat_Phylogeography_DataS2.tab",header=T,stringsAsFactors=F)
 5 | names <- read.table("741names.txt",header=F,stringsAsFactors=F)
 6 | convers <- read.table("tabw280kAlleleConversionTable.tab",header=T,stringsAsFactors=F)
 7 | geno <- all[,which(colnames(all) %in% names[,1])]
 8 | header <- all[,1:5]
 9 | data <- rbind(header,geno)
10 | #write.table(alldata,"741.tab",row.names=F,quote=F,sep="\t")
11 | #data <- read.table("741.tab",header=T,stringsAsFactors=F)
12 | #修改染色体号和snp位点
13 | chr <- paste(rep(paste("chr",1:7,sep=""),each=3),rep(c("A","B","D"),times=7), sep = "")
14 | pos <- c(471304005,438720154,452179604, 462376173, 453218924,462216879,454103970,448155269,476235359,452555092,451014251,451004620,453230519,451372872,451901030,452440856,452077197,450509124,450046986,453822637,453812268)
15 | for(i in 1:21){
16 |   data[which(data$chromosome == chr[i]),3]  <- i*2-1
17 |   data[which(data$chromosome == i*2-1 & data$snpPosition > pos[i]),3] <- i*2
18 |   data[which(data$chromosome == i*2 & data$snpPosition > pos[i]),4] <- data[which(data$chromosome == i*2),4] - pos[i]
19 |   orderd <- data[order(data[,3], data[,4]), ]
20 | }
21 | sub <- merge(data[,c(1:4)], convers,by.x="psId",by.y="probesetId",all.x=TRUE)
22 | sub_orderd <- sub[order(sub[,3], sub[,4]), ]
23 | #写42个(.convers)文件
24 | varName <- paste("chr",1:42,".convers",sep="")
25 | for(i in 1:42){
26 |   write.table(sub_orderd[which(sub_orderd$chromosome == i),], varName[i],row.names=F,quote=F,sep="\t")
27 | }
28 | 
29 | #写42个(.tab)文件
30 | varName <- paste("chr",1:42,".tab",sep="")
31 | for(i in 1:42){
32 |   write.table(orderd[which(orderd$chromosome == i),], varName[i],row.names=F,quote=F,sep="\t")
33 | }
34 | #写42个(.bed)文件
35 | varName <- paste("chr",1:42,".bed",sep="")
36 | for(i in 1:42){
37 |   var <- cbind(orderd[which(orderd$chromosome == i),3],orderd[which(orderd$chromosome == i),4],orderd[which(orderd$chromosome == i),4]+1)
38 |   write.table(var, varName[i],row.names=F,col.names=F,quote=F,sep="\t")
39 | }
40 | 


--------------------------------------------------------------------------------
/03_Analysis/10_IceData.r:
--------------------------------------------------------------------------------
1 | #冰川数据
2 | setwd("/Users/guoyafei/Downloads/stack/")
3 | data <- read.table("LR04stack.txt",header=T,stringsAsFactors = F,sep="\t")
4 | ggplot(data, aes(x=data$Time..ka., y=data$Benthic.d18O..per.mil.)) + 
5 |   geom_line() +
6 |   scale_x_log10()+scale_y_reverse() +
7 |   geom_hline(aes(yintercept=4), colour="#990000", linetype="dashed")


--------------------------------------------------------------------------------
/03_Analysis/11_ReadDepth.sh:
--------------------------------------------------------------------------------
 1 | #-------------------------------------------------------------------计算reads depth.sh--------------------------------------------------------------------------------------
 2 | #工作目录：204:/data2/xuebo/Projects/Speciation/More_accessions/hapScan/
 3 | #位点depth
 4 | samtools depth -b bed_file sample.bam > sample.depth  #bed 用来指定统计区间，运行后输出指定区间每一个碱基的测序深度。
 5 | #区间depth
 6 | bedtools makewindows -g genome.txt -w 10000000 -s 2000000 > windows.bed
 7 | #bedtools makewindows用来自动生成划窗区间。-g genome.txt是要划分的基因组，格式为两列：染色体、染色体长度；-w 10000000为窗口大小为10M；-s 1000000为步长为1M，即窗口在染色体上每次向右平移1M的距离；windows.bed为输出文件，格式为三列：染色体、区间开始位点、区间结束位点。
 8 | bedtools coverage -a windows.bed -b xxx.sort.bam > xxx.depth.txt      
 9 | #bedtools coverage对划分好的每个滑动窗口进行reads数（depth)的统计。-a windows为上一步划分好的区间；-b xxx.sort.bam为测序数据mapping到参考基因组的比对文件；xxx.depth.txt为统计结果的输出文件，格式为7列：染色体、区间起始位点、区间结束位点、该区间内的reads数、该区间内的碱基数、区间大小、该区间的平均覆盖度。       
10 | samtools bedcov bed_file samplename.bam > sample.bedcov
11 | #输出的文件中计算了bed文件每一个区间的碱基总数，这里并不是reads的条数
12 | 


--------------------------------------------------------------------------------
/03_Analysis/12_BWA.sh:
--------------------------------------------------------------------------------
 1 | #######################################################################  bwa  ##########################################################################
 2 | 
 3 | zcat file.fq.gz | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > file_sorted.fastq
 4 | 
 5 | for ID in {"CRR061683","CRR061684"}
 6 | do
 7 | ref=/data1/home/xinyue/ref/abd_iwgscV1.fa.gz
 8 | in1=/data1/home/xinyue/data/cleandata/${ID}_f1_paired.fq.gz
 9 | in2=/data1/home/xinyue/data/cleandata/${ID}_r2_paired.fq.gz
10 | in="$in1 $in2"
11 | out=/data1/home/xinyue/data/bamdata
12 | echo -e "#!/bin/bash \n
13 | bwa mem -t 32 -R '@RG\tID:$ID\tSM:$ID\tLB:SL\tPL:IL' $ref $in | samtools view -S -b - > $out/$ID.bam " > NGS_bwa.$ID.sh
14 | sh NGS_bwa.$ID.sh
15 | done
16 | ## sort
17 | samtools sort -@ 2 -O bam -o $out/$ID.sorted.bam $out/$ID.bam
18 | ## MarkDuplicates and index
19 | java -jar /data1/home/xuebo/software/picard.jar MarkDuplicates INPUT=$out/$ID.sorted.bam OUTPUT=$out/$ID.rm.bam METRICS_FILE=$out/$ID.metrics.txt
20 | ## index
21 | samtools index $out/$ID.rm.bam
22 | 
23 | 


--------------------------------------------------------------------------------
/04_GWAS/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/04_GWAS/.DS_Store


--------------------------------------------------------------------------------
/04_GWAS/03_Gene_Domain.r:
--------------------------------------------------------------------------------
 1 | ###画这个基因的各个部分的分类图
 2 | a = 449254000
 3 | b = 449260000
 4 | plot(NULL, xlim=c(a, b), ylim=c(0, 5), main="",axes=FALSE, xlab="", ylab="", xaxs="i", yaxs="i")
 5 | polygon(c(a, a, b,b), c(2.7,3,3,2.7),col="grey", border="grey")
 6 | rect(449254955,2.7,449255062,3,col="#00868B",border = NA)
 7 | rect(449255063,2.7,449255198,3,col="#00FFFF",border = NA)
 8 | rect(449255307,2.7,449255709,3,col="#00FFFF",border = NA)
 9 | rect(449256551,2.7,449257487,3,col="#00FFFF",border = NA)
10 | rect(449259163,2.7,449259241,3,col="#00868B",border = NA)
11 | 
12 | 


--------------------------------------------------------------------------------
/04_GWAS/04_Gene_trait.r:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Documents/个人项目/傅老师/20210311/GWAS_sign_gene/")
 2 | data <- read.table("VariantComponent.txt",header=T,stringsAsFactors = F)
 3 | data$X1.428374375 <- as.factor(data$X1.428374375)
 4 | data$X3.106681694 <- as.factor(data$X3.106681694)
 5 | data$X3.107363563 <- as.factor(data$X3.107363563)
 6 | data$X16.83205051 <- as.factor(data$X16.83205051)
 7 | pdf("SNP_geno_trait.pdf")
 8 | ggplot(data, aes(x = X1.428374375, y = Weight))+ 
 9 |   geom_boxplot(position=position_dodge(0.5),width=0.6) + geom_point() +theme_classic() + labs(x="NGR5_1(1-428374375)") + theme(axis.title.x = element_text(size=15), axis.title.y = element_text(size=15),axis.text.x = element_text(size=15), axis.text.y = element_text(size=15))
10 | ggplot(data, aes(x = X3.106681694, y = Weight))+ 
11 |   geom_boxplot(position=position_dodge(0.5),width=0.6) + geom_point() +theme_classic() + labs(x="PIF_2(3-106681694)") + theme(axis.title.x = element_text(size=15), axis.title.y = element_text(size=15),axis.text.x = element_text(size=15), axis.text.y = element_text(size=15))
12 | #ggplot(data, aes(x = X3.107363563, y = Weight))+ 
13 | #  geom_boxplot(position=position_dodge(0.5),width=0.6) + geom_point() +theme_classic() + labs(x="PIF_2(3-107363563)") + theme(axis.title.x = element_text(size=15), axis.title.y = element_text(size=15),axis.text.x = element_text(size=15), axis.text.y = element_text(size=15))
14 | ggplot(data, aes(x = X16.83205051, y = Weight))+ 
15 |   geom_boxplot(position=position_dodge(0.5),width=0.6) + geom_point() +theme_classic() + labs(x="GA2ox3-B1(16-83205051)") + theme(axis.title.x = element_text(size=15), axis.title.y = element_text(size=15),axis.text.x = element_text(size=15), axis.text.y = element_text(size=15))
16 | dev.off()
17 | 


--------------------------------------------------------------------------------
/04_GWAS/08_Correlation.r:
--------------------------------------------------------------------------------
 1 | library(gggplot2)
 2 | library("ggpubr")
 3 | 
 4 | data <- read.table("/Users/guoyafei/Desktop/相关性.txt", header=T, stringsAsFactors = F)
 5 | sub <- data[!is.na(data$株高.郑老师),]
 6 | sub2 <- sub[!is.na(sub$气孔导度),]
 7 | ggscatter(data, x = "株高.郑老师", y = "气孔导度", 
 8 |           add = "reg.line", conf.int = TRUE, 
 9 |           cor.coef = TRUE, cor.method = "pearson",
10 |           xlab = "Plant height", ylab = "Stomatal conductance")
11 | ggscatter(data, x = "ZX株高", y = "气孔导度", color = "orgCty",
12 |           add = "reg.line", conf.int = TRUE, 
13 |           cor.coef = TRUE, cor.method = "pearson",
14 |           xlab = "Tiller", ylab = "Stomatal conductance")
15 | ggplot(data=data, aes(x=ZX株高, y=气孔导度 ))+
16 |   geom_point(size=3,aes(x=ZX株高, y=气孔导度,color=orgCty ))+
17 |   stat_smooth(method="lm")+
18 |   xlab("plant height")+
19 |   ylab("Stomatal conductance")+
20 |   theme_classic()
21 | 
22 | ggplot(data,aes(x = ZX株高,color=orgCty)) +
23 |   geom_histogram(aes(x = ZX株高),stat="bin",binwidth=1, boundary = 0)+
24 |   theme_classic()+
25 |   xlab("plant height")
26 | 
27 | geom_text(aes(label=as.character(round(..density..,2))),stat="bin",binwidth=0.01,boundary = 0,vjust=-0.5)
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/05_Plot/Plot_Density.r:
--------------------------------------------------------------------------------
1 | install.packages('RIdeogram')
2 | 
3 | require(RIdeogram)
4 | setwd("/Users/guoyafei/Documents/01_个人项目/04_VmapIII/12_Vmap3Test/06_Test/")
5 | gene_density <- read.table("SNP_density.txt", header=T, stringsAsFactors = F)
6 | wheat_karyotype <- read.table("Centrome.txt", header=T, stringsAsFactors = F)
7 | ideogram(karyotype = wheat_karyotype, overlaid = gene_density)
8 | convertSVG("chromosome.svg", device = "png")
9 | 


--------------------------------------------------------------------------------
/05_Plot/Plot_Fst.r:
--------------------------------------------------------------------------------
  1 | #画每组比较的boxplot图
  2 | library(stringr)
  3 | path <- "/data2/yafei/Project3/FST_group/VmapData/A/boxplot"
  4 | name <- c("5_10", "5_11", "5_12", "5_13", "5_14", "5_15", "5_16", "5_17", "5_18", "5_19", "5_20", "5_21", "5_22", "5_23", "5_24", "5_6", "5_7", "5_8", "5_9", "6_10", "6_11", "6_12", "6_13", "6_14", "6_15", "6_16", "6_17", "6_18", "6_19", "6_20", "6_21", "6_22", "6_23", "6_24", "6_7", "6_8", "6_9")
  5 | fileNames <- dir(path) 
  6 | filePath <- sapply(fileNames, function(x){
  7 |   paste(path,x,sep='/')})   
  8 | data <- lapply(filePath, function(x){
  9 |   read.table(x, header=T,stringsAsFactors=F)}) 
 10 | length(data)
 11 | for(i in 1:length(data)){
 12 |   data[[i]]$Name <- name[i]
 13 |   data[[i]]$Before <-  str_split_fixed(data[[i]]$Name, "_", 2)[1]
 14 |   data[[i]]$After <- sapply(strsplit(as.character(data[[i]]$Name),'_'), "[", 2)
 15 | }
 16 | 
 17 | all <- data.frame()
 18 | for(i in 1:length(data)){
 19 |   all <- rbind(all, data[[i]])
 20 | }
 21 | 
 22 | library(ggplot2)
 23 | pdf(file = "FST_box_weight.pdf",width=200,height=100) #结果保存
 24 | ggplot(all, aes(After, WEIGHTED_FST, fill=factor(Before))) + geom_boxplot() +facet_grid(.~After, scales = "free_x")+
 25 |   scale_x_discrete()
 26 | dev.off()
 27 | pdf(file = "FST_box_mean.pdf",width=200,height=100) #结果保存
 28 | ggplot(all, aes(After, MEAN_FST, fill=factor(Before))) + geom_boxplot() +facet_grid(.~After, scales = "free_x")+
 29 |   scale_x_discrete()
 30 | dev.off()
 31 | 
 32 | #计算位点FST
 33 | #Alineage
 34 | #pop5-24:
 35 | 
 36 | thread_num=15
 37 | tempfifo="my_temp_fifo"
 38 | mkfifo ${tempfifo}	
 39 | exec 6<>${tempfifo}
 40 | rm -f ${tempfifo}
 41 | for ((i=1;i<=${thread_num};i++))
 42 |   do
 43 | {
 44 |   echo 
 45 | }
 46 | done >&6 
 47 | 
 48 | for i in {14,15,16,17,18,19,20,21,22,23}
 49 | do
 50 | for((j=i+1;j<=24;j++))
 51 |   do
 52 | {
 53 |   read -u6
 54 |   {
 55 |     vcftools --gzvcf /data1/home/yafei/Project3/Vmap1.1/Alineage.vcf.gz --weir-fst-pop /data2/yafei/Project3/FST_group/subgroups/"pop"$i".txt" --weir-fst-pop /data2/yafei/Project3/FST_group/subgroups/"pop"$j".txt"  --out /data2/yafei/Project3/FST_group/VmapData/A/site_fst/"p_"$i"_"$j >> Anohup$i 2>& 1
 56 |     echo "" >&6
 57 |   } & 
 58 | } 
 59 | done 
 60 | done
 61 | wait
 62 | 
 63 | exec 6>&-
 64 |   
 65 |   #Blineage
 66 |   #pop5-24:
 67 |   
 68 |   for i in {14,15,16,17,18,19,20,21,22,23}
 69 | do
 70 | for((j=i+1;j<=24;j++))
 71 |   do
 72 | vcftools --gzvcf /data1/home/yafei/Project3/Vmap1.1/Alineage.vcf.gz --weir-fst-pop /data2/yafei/Project3/FST_group/subgroups/"pop"$i".txt" --weir-fst-pop /data2/yafei/Project3/FST_group/subgroups/"pop"$j".txt" --out /data2/yafei/Project3/FST_group/VmapData/A/site_fst/"p_"$i"_"$j >> Anohup$i 2>& 1 &
 73 |   done 
 74 | done
 75 | 
 76 | #提取D site FST top 5%的位点
 77 | for i in `ls *.weir.fst`
 78 | do
 79 | sed '/-nan/d' $i  | awk  '{if ($3 < 0 ) {$3=0;print $0} else{print $0}}'  | sort -k3 -gr  > $i.txt &
 80 |   done
 81 | #计算每个文件的行数，并计算%5是多少行，重定向到*pos文件中
 82 | for i in `ls *txt`
 83 | do
 84 | wc -l $i
 85 | done
 86 | 
 87 | #提取fst大于0.2的位点
 88 | for i in `ls *txt`
 89 | do
 90 | awk '{if($3 > 0.2) {print $0} else {exit}}' $i  > $i.pos2 &
 91 |   done
 92 | 
 93 | #粘到Excel里
 94 | 
 95 | cat *pos2 | awk '{print $1"\t"$2}' | sort | uniq |awk '{print $1"\t"$2-1"\t"$2}' |sort -k1,1n -k2,2n > D_highfst.bed
 96 | 
 97 | awk '{if($1 == 10) print $2"\t"$3-1"\t"$3}' count.0.2thresh |sort -k1,1n -k2,2n > my.bed
 98 | 
 99 | #提取
100 | yafei@203:/data2/yafei/Project3/FST_group/VmapData/D/site_fst/D_highfst.bed
101 | 
102 | 
103 | for chr in {1,2,7,8,13,14,19,20,25,26,31,32,37,38}
104 | for chr in {5,6,11,12,17,18,23,24,29,30,35,36,41,42}
105 | do
106 | bedtools intersect -a /data1/home/yafei/Project3/Vmap1.1/chr${chr}.all.vcf.gz -b /data2/yafei/Project3/FST_group/VmapData/D/site_fst/my/myD.bed -header > chr${chr}.D.vcf &
107 |   done


--------------------------------------------------------------------------------
/05_Plot/Plot_IBS.r:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Documents/Lulab/Project-2-Migration/基本统计/IBS/")
 2 | data <- read.table("126Landrace_barley.txt",header=T,stringsAsFactors = F)
 3 | library(ggplot2)
 4 | library(ggmap)
 5 | library(sp)
 6 | library(maptools)
 7 | library(maps)
 8 | library(psych)
 9 | visit.x<-data$Logititude
10 | visit.y<-data$Latitude
11 | mp <- NULL
12 | mapworld <- borders("world",colour = "gray50",fill="white") 
13 | mp <- ggplot()+mapworld+ylim(-60,90)
14 | 
15 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$wild_A),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+
16 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
17 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$dome_A),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+
18 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
19 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$free_A),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+
20 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
21 | 
22 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$wild_B),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+
23 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
24 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$dome_B),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+
25 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
26 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$free_B),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+
27 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
28 | 
29 | mp2 <- mp+geom_point(aes(x=data$Logititude,y=data$Latitude,color=data$Strangulata),size=2)+scale_size(range=c(1,1))+ scale_colour_gradient(low = "lightblue",high = "darkblue")+
30 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
31 | 
32 | mp3 <- mp2+ guides(fill=guide_legend(title=NULL))
33 | mp3
34 | 


--------------------------------------------------------------------------------
/05_Plot/Plot_MapPie.r:
--------------------------------------------------------------------------------
 1 | #统一设置：注释内容及颜色
 2 | library(pheatmap)
 3 | require(reshape)
 4 | require (rworldmap)
 5 | require(rworldxtra)
 6 | library(pophelper)
 7 | library(ggplot2)
 8 | require(gridExtra)
 9 | setwd("/Users/guoyafei/Documents/个人项目/Project-2-Migration/migration/add_ZNdata/Structure_map/EA/")
10 | sfiles <- list.files(path="/Users/guoyafei/Documents/个人项目/Project-2-Migration/migration/add_ZNdata/Structure_map/EA/QMatrix/", full.names=T)
11 | slist <- readQ(files=sfiles)
12 | tabulateQ(qlist=readQ(sfiles))
13 | summariseQ(tabulateQ(qlist=readQ(sfiles)))
14 | 
15 | p1 <- plotQ(slist[1:5],returnplot=T,exportplot=F,quiet=T,basesize=11,
16 |             showindlab=F,showyaxis=T,showticks=T,clustercol=brewer.pal(6, "Set2"))
17 | 
18 | p1 <- plotQ(slist[1:5],returnplot=T,exportplot=F,quiet=T,basesize=11,
19 |             sortind="all",showindlab=T,showyaxis=T,showticks=T,sharedindlab=T,clustercol=brewer.pal(6, "Set2"))
20 | 
21 | p1 <- plotQ(slist[1:5],returnplot=T,exportplot=F,quiet=T,basesize=11,
22 |             sortind="Cluster1",showindlab=T,showyaxis=T,showticks=T,sharedindlab=T)
23 | 
24 | grid.arrange(p1$plot[[1]],p1$plot[[2]],p1$plot[[3]],p1$plot[[4]],p1$plot[[5]],nrow=5)
25 | 
26 | grid.arrange(m,p1$plot[[1]],nrow=2)
27 | 
28 | 
29 | 
30 | #聚类
31 | #根据经纬度给样本聚类
32 | data <- read.table("EA_taxa.txt", header=T, sep="\t", stringsAsFactors = F)
33 | library(cluster)
34 | library(factoextra)
35 | dataA <- data[,c(3,4)]
36 | data2 <- dataA[!is.na(dataA$Latitude),]
37 | df = scale(data2)
38 | 
39 | #先求样本之间两两相似性
40 | result <- dist(df, method = "euclidean")
41 | #产生层次结构
42 | result_hc <- hclust(d = result, method = "ward.D2")
43 | 
44 | data2$type <- cutree(result_hc, k=50)
45 | lat_mean <- tapply(data2[,1],data2$type,mean,na.rm = TRUE)
46 | lon_mean <- tapply(data2[,2],data2$type,mean,na.rm = TRUE)
47 | data2$cluster1 <- NA
48 | data2$cluster2 <- NA
49 | for(i in 1:138) {
50 |   for(j in 1:50){
51 |     if(data2[i,3] == j ){
52 |       data2[i,4] <- as.numeric(lat_mean[j])
53 |       data2[i,5] <- as.numeric(lon_mean[j])
54 |     } 
55 |   }
56 | }
57 | write.table(data2,"BB_data2.txt",sep="\t",row.names = F,quote=F)
58 | 
59 | #map
60 | require("RColorBrewer")
61 | setwd("/Users/guoyafei/Documents/个人项目/Project-2-Migration/migration/add_ZNdata/Structure_map/EA/")
62 | data <- read.table("EA_taxa.txt",header=T,sep="\t",stringsAsFactors = F)
63 | dataA <- data[,c(1,5,6,16,17)]
64 | colnames(dataA)[1:5] <- c("type","Latitude", "Longitude", "Sub.population.6", "value")
65 | wheat2 = dataA[!is.na(dataA$Latitude),]
66 | wheat = wheat2[!is.na(wheat2$Sub.population.6),]
67 | wheat_reshape <- wheat_reshape <- cast(wheat,Latitude+Longitude~Sub.population.6) 
68 | wheat_reshape2 <- as.data.frame(wheat_reshape)
69 | 
70 | #8
71 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2',"3","4","5","6","7","8"),symbolSize=2.5,
72 |         zColours=brewer.pal(8, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome")
73 | #6
74 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2',"3","4","5","6"),symbolSize=2.5,
75 |         zColours=brewer.pal(6, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome")
76 | #5
77 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2',"3","4","5"),symbolSize=2.5,
78 |         zColours=brewer.pal(5, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome")
79 | #4
80 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,36),nameX="Longitude",nameY="Latitude",nameZs=c('1','2',"3","4"),symbolSize=2.5,
81 |         zColours=brewer.pal(4, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome")
82 | #3
83 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2','3'),symbolSize=2.5,
84 |         zColours=brewer.pal(3, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome")
85 | #2
86 | mapPies(wheat_reshape2,xlim=c(60,140),ylim=c(35,40),nameX="Longitude",nameY="Latitude",nameZs=c('1','2'),
87 |         zColours=brewer.pal(2, "Set2"),symbolSize=2.5,barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main="A subgenome")
88 | par(mfcol=c(5,1))


--------------------------------------------------------------------------------
/05_Plot/Plot_Pi_02.r:
--------------------------------------------------------------------------------
 1 | # PI.R
 2 | # Libraries
 3 | library(ggplot2)
 4 | library(dplyr)
 5 | library(tidyr)
 6 | library(forcats)
 7 | #library(hrbrthemes)
 8 | library(viridis)
 9 | 
10 | # Load dataset from github
11 | data <- read.table("https://raw.githubusercontent.com/zonination/perceptions/master/probly.csv", header=TRUE, sep=",")
12 | 
13 | # Data is at wide format, we need to make it 'tidy' or 'long'
14 | data <- data %>% 
15 |   gather(key="text", value="value") %>%
16 |   mutate(text = gsub("\\.", " ",text)) %>%
17 |   mutate(value = round(as.numeric(value),0)) %>%
18 |   filter(text %in% c("Almost Certainly","Very Good Chance","We Believe","Likely","About Even", "Little Chance", "Chances Are Slight", "Almost No Chance"))
19 | 
20 | # Plot
21 | p <- data %>%
22 |   mutate(text = fct_reorder(text, value)) %>% # Reorder data
23 |   ggplot( aes(x=text, y=value, fill=text, color=text)) +
24 |   geom_violin(width=2.1, size=0.2) +
25 |   scale_fill_viridis(discrete=TRUE) +
26 |   scale_color_viridis(discrete=TRUE) +
27 |   #theme_ipsum() +
28 |   theme(
29 |     legend.position="none"
30 |   ) +
31 |   coord_flip() + # This switch X and Y axis and allows to get the horizontal version
32 |   xlab("") +
33 |   ylab("Assigned Probability (%)")
34 | 
35 | p
36 | p


--------------------------------------------------------------------------------
/06_MarkDown/ASMC.md:
--------------------------------------------------------------------------------
 1 | # ASMC
 2 | > P. Palamara, J. Terhorst, Y. Song, A. Price. High-throughput inference of pairwise coalescence times identifies signals of selection and enriched disease heritability. Nature Genetics, 2018. 
 3 | ### Summary (TL;DR)
 4 | ASMC是一种有效估计基因组`pairwise coalescence times`的方法。它可以使用SNP芯片或全基因组测序(WGS)数据运行。
 5 | ### 输入文件格式
 6 | * HAP文件
 7 | 是一个没有标题行的文本文件，有 2N+5 或 2N 列，其中 N 是样本数。 在前一种情况下，前五列是：
 8 |   1. 染色体代码
 9 |   2. 变体 ID
10 |   3. 碱基对坐标 等位基因 0（通常是次要的，导入时使用 'ref-first' 视为 REF）
11 |   4. 等位基因 1（通常是主要的，导入时使用 'ref-last' 视为 REF） 
12 | 接下来是第一个样本的一对 0/1 值的单倍型列，然后是第二个样本的一对单倍型列
13 | * Sample文件
14 |   gen 或 .bgen 基因型剂量文件或 .haps 分阶段参考面板随附的样本信息文件。 加载了--data/--sample，并在某些情况下由--export 生成。
15 | 
16 | 默认情况下，由 --export 发出的 .sample 空格分隔文件有两个标题行，然后每个样本一行有 4 个以上的字段：
17 | 
18 | 
19 | * map文件
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/06_MarkDown/GATK.md:
--------------------------------------------------------------------------------
1 | # GATK


--------------------------------------------------------------------------------
/06_MarkDown/MCMC.md:
--------------------------------------------------------------------------------
1 | ## 马尔可夫链蒙特卡罗算法（MCMC）
2 | 
3 | ### 一句话概括就是：通过在概率空间中随机采样以近似兴趣参数（parameter of interest）的后验分布。
4 | 
5 | * 首先，「兴趣参数」（parameter of interest）可以总结我们感兴趣现象的一些数字。我们通常使用统计学评估参数，比如，如果想要了解成年
6 | 人的身高，我们的兴趣参数可以是精确到英寸的平均身高。「分布」是参数的每个可能值、以及我们有多大可能观察每个参数的数学表征，其最著名的实例是钟形曲线：
7 | * ![98873image -21-](assets/98873image%20-21-.png)
8 | 


--------------------------------------------------------------------------------
/06_MarkDown/bayenv_method.md:
--------------------------------------------------------------------------------
 1 | ### 基因组上一个与环境相关的位点，有什么样的特征？
 2 | * 这些位点可能表现出等位基因频率与重要生态变量之间有异常的相关性或者不同地理环境间的等位基因频率的差异很大，这些特征可以使我们识别出参与局部适应的潜在位点。
 3 | ### 如果想在全基因组上鉴定一个环境相关的位点，会遇到什么样的困难？
 4 | * 样本大小的差异，群体之间共享的分化历史和基因流可能会导致群体间的等位基因频率有一定的相关性(neutral correlation)，这会混淆我们对适应性位点的鉴定。
 5 | ### 如何克服？
 6 | * 作者开发了一种贝叶斯方法，从一组标记中估计群体间等位基因频率协方差的经验模式，然后将其作为一个零模型用于单个SNPs的测试。
 7 | ### 方法具体是怎样的？
 8 | * 在作者的模型中，群体中一个等位基因的频率来自一组潜在的群体频率，假设这些总体频率的变换遵循多元正态分布。
 9 | * 作者首先用蒙特卡罗马尔可夫链估计多元正态分布的协方差矩阵。在每个SNP中，作者提供了一种支持模型的度量方式-贝叶斯因子，其中环境变量对转换后的等位基因频率有线性影响，而不是单独由协方差矩阵给出的模型。该测试的性能优于现有的相关测试。
10 | * 作者还证明，该方法可以用于识别具有异常大的等位基因频率分化的snp，并提供了一个强有力的基于两两或全局FST的替代方法。
11 | * 软件 http://www.eve.ucdavis.edu/gmcoop/
12 | ![](assets/16442473466436.jpg)
13 | ### 问题描述
14 | * 如图1所示，在广泛区域内的群体在等位基因频率和离赤道的距离（纬度）两个变量上都是聚集的。因此，等位基因频率和环境变量之间的相关性仅得到了比图1中52个点少得多的独立观察数据的支持。此外，还不清楚图1中等位基因频率的变化是否是由于样本量少造成的采样误差或者遗传漂变产生的。
15 | * 因此，在作者的模型中，在测试环境变量和等位基因频率之间的相关性时，考虑了样本大小的差异和等位基因频率的零相关假设。
16 | * 为了做到这一点，我们使用一组控制位点（control loci）来构建等位基因频率如何在人群中共变的零模型（null model）。然后，我们可以测试在一个感兴趣的位点看到的等位基因频率与环境变量之间的相关性是否大于这个零模型的预期。我们集中讨论了snp等共显性和双等位的标记，该模型也可以扩展到其他类型的标记。本文提出的方法可应用于连续或离散环境变量。
17 | ### 方法
18 | 零模型(Null model):
19 | 假设在K个群体中有L个独立的SNPs。如果L非常大，为了提高计算速度，我们可以使用L的一个大的随机子集来估计空模型。与环境相关的标记可能在L中，但是我们预计这一小部分的位点对零模型的参数估计产生很小的影响。另外，L也代表了一组良好的控制标记。
20 | 
21 | To overcome the constrained nature of allele frequencies
22 | 为了克服等位基因频率的局限性：
23 | 我们遵循尼科尔森的假设：亚群的等位基因频率通常是正态分布的，围绕着一个祖先的等位基因频率el (0<el<1)，频率高于1和低于0的替换为1和0。
24 | 
25 | 此外，我们采用nicholson等人(2002)的假设，对于一个特定的子种群，这个正态分布的方差是一个因子的乘积，该因子在各个位点上都是常数，再乘以一个特定的位点项。
26 | 
27 | 该模型描述了一个纯漂移模型，在该模型中，每个种群中的等位基因频率独立地从祖先等位基因频率漂移。
28 | 


--------------------------------------------------------------------------------
/07_VMap3/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/07_VMap3/.DS_Store


--------------------------------------------------------------------------------
/07_VMap3/01_Map.r:
--------------------------------------------------------------------------------
 1 | library(readxl)
 2 | library(ggmap)
 3 | library(RColorBrewer)
 4 | a <- read_excel("/Users/guoyafei/RstudioProjects/GitHub/R_Code/07_VMap3/Lulab_germplasm_Info.xlsx", sheet=1, na='NA')
 5 | taxa <- as.data.frame(a[which(a$Latitude != "-" & a$Longitude != "-" ),c(10,14,17,18)])
 6 | 
 7 | taxa_jm <- as.data.frame(a[which(a$Latitude != "-" & a$Longitude != "-" & a$DataSource != "XD" & a$Genome == "AABBDD"),c(14,17,18)])
 8 | 
 9 | taxa$Latitude <- as.numeric(taxa$Latitude)
10 | taxa$Longitude <- as.numeric(taxa$Longitude)
11 | 
12 | taxa_jm$Latitude <- as.numeric(taxa_jm$Latitude)
13 | taxa_jm$Longitude <- as.numeric(taxa_jm$Longitude)
14 | 
15 | #绘图:全部的
16 | mp <- NULL
17 | mapworld <- borders("world",colour = "gray90",fill="gray90") 
18 | mp <- ggplot() + 
19 |   mapworld + 
20 |   #xlim(0,90) +
21 |   ylim(-60,90) + 
22 |   theme_classic()
23 | 
24 | color <- brewer.pal(8, "Dark2")[c(4,6,1,2,3,5)]
25 | color <- brewer.pal(8, "Dark2")[c(6)]
26 | 
27 | mp+geom_point(aes(x=taxa$Longitude, y=taxa$Latitude,color = taxa$Genome), size=1.5)+
28 |   scale_size(range=c(1,1))+
29 |   scale_color_manual(values = color) +
30 |   theme(axis.text = element_blank(),
31 |         axis.ticks = element_blank(),
32 |         axis.title = element_blank(),
33 |         legend.position = "null",
34 |         panel.border = element_blank())  
35 | 
36 | #AABBDD
37 | sub <- taxa[which(taxa$type_final == "Wild_emmer"),]
38 | sub <- taxa[which(taxa$type_final == "Domesticated_emmer"),]
39 | sub <- taxa[which(taxa$type_final == "Free_threshing_tetraploids"),]
40 | sub <- taxa[which(taxa$type_final == "Landrace"),]
41 | sub <- taxa[which(taxa$type_final == "Cultivar"),]
42 | sub <- taxa[which(taxa$type_final == "Strangulata"),]
43 | 
44 | mp+geom_point(aes(x=sub$Longitude, y=sub$Latitude), color=color[6],size=1.5)+
45 |   scale_size(range=c(1,1))+
46 |   theme(axis.text = element_blank(),
47 |         axis.ticks = element_blank(),
48 |         axis.title = element_blank(),
49 |         legend.position = "null",
50 |         panel.border = element_blank())
51 | 
52 | mp+geom_point(aes(x=taxa_jm$Longitude, y=taxa_jm$Latitude, color = taxa_jm$type_final),size=1.5,alpha=0.8,stroke = 0.08)+
53 |   scale_size(range=c(1,1))+
54 |   scale_color_manual(values = c("#E69F00","#56B4E9","#A07D35")) +
55 |   theme(axis.text = element_blank(),
56 |         axis.ticks = element_blank(),
57 |         axis.title = element_blank(),
58 |         panel.border = element_blank())
59 | 
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/07_VMap3/03_FastSMC.r:
--------------------------------------------------------------------------------
1 | #66:/data1/home/yafei/004_Vmap3/FastSMC/input
2 | 
3 | awk '{if(NR>2) print $2,$2,$3,$4;else print $0}' Alineage001_Free_threshing_phased_imputed.sample
4 | 
5 | 


--------------------------------------------------------------------------------
/07_VMap3/05_TasselGWAS.sh:
--------------------------------------------------------------------------------
 1 | #数据筛选及格式转换
 2 | 
 3 | #需要安装的软件：plink等
 4 | 1.按MAF>0.05和缺失率<0.1过滤
 5 | plink --vcf test.imputed.vcf --maf 0.05 --geno 0.1 --recode vcf-iid --out test.filter --allow-extra-chr --autosome-num 42 
 6 | #（非数字染色体号ChrUn/Sy用此参数, 建议尽量把染色体号转成数字，另外需要对vcf中的标记ID进行编号）
 7 | 2.对标记进行LD筛选
 8 | plink --vcf test.filter.vcf --indep-pairwise 50 10 0.2 --out test.filterLD --allow-extra-chr --autosome-num 42（.in文件里是入选的标记id）
 9 | 3.提取筛选结果
10 | plink --vcf test.filterLD.vcf --make-bed --extract test.filter.in --out  test.filter.prune.in
11 | 4.转换成structure/admixture格式
12 | plink --bfile test.filter.prune.in --recode structure --out test.filter.prune.in  #生成. recode.strct_in为structure输入格式
13 | plink --bfile test.filter.prune.in --recode 12 --out test.filter.prune.in  #生成.ped为admixture输入格式
14 | 
15 | #群体结构
16 | #需要安装的软件：structure/admixture等
17 | #这里我选了比较简单admixture来做 k值范围1到13
18 | admixture --cv test.filter.ped 1 >>log.txt
19 | admixture --cv test.filter.ped 2 >>log.txt
20 | .......
21 | admixture --cv test.filter.ped 13 >>log.txt
22 | wait
23 | grep "CV error" log.txt >k_1to13
24 | 
25 | #取CV error最小时的k值=10,  其中test.filter.prune.in.10.Q结果文件作为关联分析的输入源文件（去掉最后一列 添加表头和ID）
26 | 
27 | #亲缘关系/PCA
28 | #需要安装的软件: tassel等(PCA分析可以用R包，已经做了群体结构这里就没做PCA分析)
29 | run_pipeline.pl -importGuess test_impute.vcf -KinshipPlugin  -method Centered_IBS -endPlugin -export test_kinship.txt -exportType SqrMatrix
30 | 
31 | #关联分析
32 | #需要安装的软件: tassel/GAPIT/FaSt-LMM等（ GAPIT很强大，要装很多R包，自动做图可视化。FaSt-LMM以后打算尝试一下）
33 | #输入文件的格式需要手动修改一下，也比较简单 (如图)
34 | 1.vcf转hapmap格式
35 | run_pipeline.pl -fork1 -vcf test.imputed.vcf  -export test -exportType Hapmap -runfork1
36 | 2.SNP位点排序
37 | run_pipeline.pl -SortGenotypeFilePlugin -inputFile test.hmp.txt  -outputFile test_sort -fileType Hapmap
38 | #得到test.hmp.txt
39 | 3. GLM模型
40 | run_pipeline.pl -fork1 -h test_sort.hmp.txt -fork2 -r test.trait.txt -fork3 -q test.best_k10.txt -excludeLastTrait -combine4 -input1 -input2 -input3 -intersect -glm -export test_glm -runfork1 -runfork2 -runfork3
41 | 4.MLM模型
42 | run_pipeline.pl -fork1 -h test_sort.hmp.txt -fork2 -r test.trait.txt -fork3 -q test.best_k10.txt -excludeLastTrait -fork4 -k test_kinship.txt -combine5  -input1 -input2 -input3 -intersect -combine6 -input5 -input4 -mlm -mlmVarCompEst P3D -mlmCompressionLevel None -export test_mlm -runfork1 -runfork2 -runfork3
43 | 


--------------------------------------------------------------------------------
/07_VMap3/06_Selscan.r:
--------------------------------------------------------------------------------
 1 | #selscan
 2 | #参考资料
 3 | #hapFLK对具有对瓶颈效应或迁移的群体有着更强大的检测选择信号的能力。
 4 | #H12对于部分扫荡选择性清除(soft selective sweep)和全扫荡选择性清除(hard selective sweep)都有比较好的检测效率。
 5 | #LASSI-Plus: A program written in C++ that implements the likelihood ratio Λ statistic of DeGiorgio and Szpiech (2021) for detecting selective sweeps and inferring their softness using the spatial distribution of distortions in the haplotype frequency spectrum.
 6 | #SweepFinder2: A program written in C that can perform genomic scans for recent selective sweeps selection while controlling for background selection and mutation rate variation (DeGiorgio et al. 2016).
 7 | #ihs
 8 | selscan --ihs --threads 30  --gap-scale 200000 --max-gap 2000000 --maf 0.01 --vcf chr001.phased.imputed.pos.vcf.gz --map chr001.map4 --out chr001 --keep-low-freq
 9 | #selscan --ihs --threads 30 --gap-scale 200000 --max-gap 2000000 --maf 0.01  --vcf chr001.phased.imputed.pos.maf005.vcf.gz --map chr001.maf005.map4 --out chr001.maf005 --keep-low-freq
10 | 
11 | data <- read.table("/Users/guoyafei/公共资源/01_数据资源/iwgsc_refseqv1.0_mapping_data_42chr.txt",header=T,stringsAsFactors = F)
12 | don <- data %>% 
13 |   # Compute chromosome size
14 |   group_by(chromosome) %>% 
15 |   lines(lowess(data$physicalPosition,data$geneticPosition)) %>% 
16 |   # Calculate cumulative position of each chromosome
17 |   mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>%
18 |   select(-chr_len) %>%
19 |   # Add this info to the initial dataset
20 |   left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
21 |   # Add a cumulative position of each SNP
22 |   arrange(CHR, BP) %>%
23 |   mutate( BPcum=BP+tot)
24 | 
25 | 


--------------------------------------------------------------------------------
/07_VMap3/07_scam.r:
--------------------------------------------------------------------------------
 1 | require(scam)
 2 | testx=data1$physicalPosition
 3 | length(testx)
 4 | testy=data1$geneticPosition
 5 | length(testy)
 6 | fit = scam(testy~s(testx,k=30,bs="cr"), family=gaussian(link="identity"))
 7 | predict(fit,data.frame(testx))
 8 | #plot(testx,testy)
 9 | #lines(testx,predict(fit),col="red")
10 | 
11 | plot(testx,predict(fit,data.frame(testx)))
12 | #head(data.frame(testx))
13 | #z<-diff(predict(fit,data.frame(testx)))
14 | #sort(z)
15 | #zz<-data.frame(testx,testy,predict(fit,data.frame(testx)))
16 | #summary(zz)
17 | #zz%>%
18 | #  gather(key="group",value="y",c(2,3)) %>%
19 | #  ggplot(aes(x=testx,y=y,color=group))+geom_point(size=0.1)
20 | #zz%>%ggplot(aes(x=predict.fit..data.frame.testx..,y=testy))+geom_point()
21 | #test<-c(1157133,2410604)
22 | #approx(df1$physicalPosition,df1$geneticPosition,xout = test,method = "linear",rule = 1,ties = "ordered")$y
23 | #head(df1$physicalPosition)
24 | #zlist<- unique(df$chromosome)
25 | #for(p in zlist){
26 | #  print(p)
27 | #}
28 | #head(df)
29 | require(scam)
30 | setwd("/Users/guoyafei/Documents/02_VmapIII/06_Selection/selscan")
31 | map <- read.table("/Users/guoyafei/公共资源/01_数据资源/iwgsc_refseqv1.0_mapping_data_42chr.txt",header=T,stringsAsFactors = F)
32 | filenames <- paste(c(paste("chr00",1:9,sep=""),paste("chr0",10:42,sep="")),".pos.txt",sep="")
33 | for(i in c(6)){
34 |   sub <- map[which(map$chromosome == i),]
35 |   pos <- read.table(filenames[i],header=F,stringsAsFactors = F)
36 |   colnames(pos) <- c("chr","physicalPosition")
37 |   physicalPosition=sub$physicalPosition
38 |   geneticPosition=sub$geneticPosition
39 |   fit = scam(geneticPosition~s(physicalPosition,k=20,bs="mpi"), family=gaussian(link="identity"))
40 |   #plot(physicalPosition,predict(fit,data.frame(geneticPosition)))
41 |   ID <- paste(pos$chr,pos$physicalPosition,sep="-")
42 |   physicalPosition <- pos$physicalPosition
43 |   out <- data.frame(pos$chr,ID,predict(fit,data.frame(physicalPosition)),pos$physicalPosition)
44 |   outfile <- paste("/Users/guoyafei/Documents/02_VmapIII/06_Selection/selscan/map/",filenames[i],sep="")
45 |   write.table(out,outfile,col.names = F,quote = F,sep="\t", row.names = F)
46 | }
47 | 
48 | #chr036.ihs.out
49 | data <- read.table("/Users/guoyafei/Desktop/chr036.ihs.out",header=F,stringsAsFactors = F)
50 | colnames(data)<-c("locus","pos","1_freq","ihh_1","ihh_0","ihs")
51 | data2 <- data[sort(sample(c(1:52691),10000)),]
52 | p<- ggplot(data2, aes(x=pos/1000000, y=ihs)) +
53 |       # Show all points
54 |       geom_point(alpha=0.8, size=1.3) +
55 |       #scale_color_manual(values = rep(c("grey","grey", "skyblue", "skyblue"), 7)) +
56 |       # custom X axis:
57 |       #scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
58 |       #scale_y_continuous(expand = c(0, 0) ) +     # remove space between plot area and x axis
59 |       # Add highlighted points
60 |       #geom_point(data=subset(don, is_highlight=="yes"), color="orange", size=2) +
61 |       # Add label using ggrepel to avoid overlapping
62 |       #geom_label_repel( data=subset(don, is_annotate=="yes"), aes(label=SNP), size=2) +
63 |       # Custom the theme:
64 |       theme_bw() +
65 |       theme( 
66 |         legend.position="none",
67 |         panel.border = element_blank(),
68 |         panel.grid.major.x = element_blank(),
69 |         panel.grid.minor.x = element_blank(),
70 |         axis.text.x=element_text(size=15),
71 |         axis.text.y=element_text(size=15),
72 |         axis.title.y=element_text(size = 15),
73 |         axis.title.x=element_text(size = 15),
74 |       )
75 |       #scale_y_continuous(limits = c(0,7))+
76 |       #geom_point(data=point,aes(x=BPcum,y=-log10(P)),color="red")
77 |     #geom_vline(xintercept = 528377322, colour="red",linetype=2, size=1)
78 |     #geom_hline(yintercept = -log10(thresh[a,1]), colour="red",linetype=2, size=1)+
79 |     #geom_hline(yintercept = -log10(thresh[a,2]), colour="blue",linetype=2, size=1)
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/07_VMap3/08_Raxml-ng.r:
--------------------------------------------------------------------------------
 1 | #Raxml-ng
 2 | #yafei@204:/data2/yafei/004_Vmap3/Tree
 3 | #run_pipeline.pl -Xms512m -Xmx5g -vcf lineageD.vcf.gz -export lineageD -exportType Phylip_Inter
 4 | #raxml-ng --all --msa lineageAB.phy --seed 12346 --model GTR+G --bs-trees 100 --threads 80 
 5 | 
 6 | setwd("/Users/guoyafei/Desktop/")
 7 | data <- read.table("/Users/guoyafei/Desktop/207_data.txt",header=F,stringsAsFactors = F)
 8 | data <- read.table("/Users/guoyafei/Desktop/source_data_107.txt",header=F,stringsAsFactors = F)
 9 | library(ggplot2)
10 | 
11 | ggplot(data, aes(x = V1)) +
12 |   geom_histogram(binwidth = 5, fill = "lightblue", colour = "black")+
13 |   xlim(1899,2010)+
14 |   theme_classic()+
15 |   theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size = 15),axis.text.y = element_text(size = 12),axis.title.y = element_text(size = 15))+
16 |   ylab("Count")+
17 |   xlab("Source Date (USDA)")
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/07_VMap3/09_WheatGo.r:
--------------------------------------------------------------------------------
 1 | #wheatGo
 2 | library(ggplot2)
 3 | library(RColorBrewer)
 4 | display.brewer.all()
 5 | col <- brewer.pal(n = 8, name = "Blues")[c(4,7)]
 6 | col <- brewer.pal(n = 8, name = "Oranges")[c(4,7)]
 7 | col <- brewer.pal(n = 8, name = "Greens")[c(4,7)]
 8 | col <- brewer.pal(n = 8, name = "Red")[c(4,7)]
 9 | 
10 | setwd("/Users/guoyafei/Documents/02_VmapIII/08_Network/shufgene")
11 | setwd("/Users/guoyafei/Documents/02_VmapIII/08_Network/PPI")
12 | data <- read.table("go.txt",header=T,stringsAsFactors = F,sep="\t")
13 | colnames(data)[c(3,4,6)] <- c("Description", "Count", "p.adjust")
14 | ggplot(data=data)+
15 |   geom_bar(aes(x= Description, y=Count, fill=(-log10(p.adjust))), stat='identity') +
16 |   coord_flip() +
17 |   scale_fill_gradient(expression(-log(p.adjust)),low="#FDAE6B", high = "#D94801") +
18 |   ylab("Gene number") +
19 |   xlab("GO term description") +
20 |   theme(
21 |     axis.text.x=element_text(color="black",size=rel(0.3)),
22 |     axis.text.y=element_text(color="black", size=rel(0.3)),
23 |     axis.title.x = element_text(color="black", size=rel(5)),
24 |     axis.title.y = element_blank(),
25 |     legend.text=element_text(color="black",size=rel(0.2)),
26 |     legend.title = element_text(color="black",size=rel(0.7))
27 |   )+
28 |   ylim(0,130)+
29 |   theme_bw()+
30 |   theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=1,colour="black"))
31 | 
32 | 
33 | #画整体的Go富集图----
34 | path <- "/Users/guoyafei/Documents/02_VmapIII/08_Network/shufgene/Go"
35 | fileNames <- dir(path) 
36 | filePath <- sapply(fileNames, function(x){
37 |   paste(path,x,sep='/')})   
38 | data <- lapply(filePath, function(x){
39 |   read.table(x, header=F, sep = "\t", stringsAsFactors=F)}) 
40 | plot_list = list() 
41 | for(i in 1:length(data)){
42 |   colnames(data[[i]])[c(3,4,6)] <- c("Description", "Count", "p.adjust")
43 |   p <- ggplot(data=data[[i]])+
44 |     geom_bar(aes(x= Description, y=Count, fill=(-log(p.adjust))), stat='identity') +
45 |     coord_flip() +
46 |     #facet_grid(.~Name,scales="free") +
47 |     #facet_wrap(~Name,ncol = 1)+
48 |     #A
49 |     scale_fill_gradient(expression(-log(p.adjust)),low="#FDAE6B", high = "#D94801") +
50 |     #B
51 |     #scale_fill_gradient(expression(p.adjust),low="#9ECAE1", high = "#2171B5") +
52 |     #D
53 |     #scale_fill_gradient(expression(p.adjust),low="#A1D99B", high = "#238B45") +
54 |     ylab("Gene number") +
55 |     xlab("GO term description") +
56 |     #expand_limits(y=c(0,8))+
57 |     #ylim(0,100)+
58 |     theme(
59 |       axis.text.x=element_text(color="black",size=rel(0.3)),
60 |       axis.text.y=element_text(color="black", size=rel(0.3)),
61 |       axis.title.x = element_text(color="black", size=rel(5)),
62 |       axis.title.y = element_blank(),
63 |       legend.text=element_text(color="black",size=rel(0.2)),
64 |       legend.title = element_text(color="black",size=rel(0.7))
65 |     )+
66 |     theme_bw()+
67 |     theme(panel.grid=element_blank(),panel.border=element_blank(),axis.line=element_line(size=1,colour="black"))
68 |   
69 |   plot_list[[i]] = p
70 | }
71 | 
72 | name <- strsplit(names(data), ".txt")
73 | for (i in 1:length(data)) { 
74 |   file_name = paste(name[[i]],".pdf",sep="") 
75 |   pdf(file_name,height = 12,width = 9) 
76 |   print(plot_list[[i]]) 
77 |   dev.off() 
78 | } 
79 | 


--------------------------------------------------------------------------------
/07_VMap3/10_SDS.r:
--------------------------------------------------------------------------------
 1 | #SDS
 2 | #66:/data1/home/yafei/004_Vmap3/SDS-master
 3 | library(qqman)
 4 | library(tidyverse)
 5 | require(gridExtra)
 6 | setwd("/Users/guoyafei/Documents/02_VmapIII/06_Selection/SDS")
 7 | chr1 <- read.table("test.txt", header=T, stringsAsFactors = F)
 8 | 
 9 | #manhattan
10 | gwasResults2 <- chr1
11 | thresh <- gwasResults2[order(gwasResults2$P),][nrow(gwasResults)*0.05,4]
12 | #gwasResults2$CHR = 1
13 | colnames(gwasResults2) <- c("CHR","BP","SNP","P")
14 |   gwasResults <- gwasResults2
15 |   don <- gwasResults %>% 
16 |     # Compute chromosome size
17 |     group_by(CHR) %>% 
18 |     summarise(chr_len=max(BP)) %>% 
19 |     # Calculate cumulative position of each chromosome
20 |     mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>%
21 |     select(-chr_len) %>%
22 |     # Add this info to the initial dataset
23 |     left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
24 |     # Add a cumulative position of each SNP
25 |     arrange(CHR, BP) %>%
26 |     mutate( BPcum=BP+tot) 
27 |   axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) )/ 2)
28 | p <- ggplot(don, aes(x=BPcum, y=-log(P))) +
29 |     geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) +
30 |     scale_color_manual(values = rep(c("grey","grey", "skyblue", "skyblue"), 7)) +
31 |     scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
32 |     scale_y_continuous(expand = c(0, 0) ) +     # remove space between plot area and x axis
33 |     geom_hline(yintercept = -log(thresh), color = 'red', size = 0.5) +   
34 |     scale_y_continuous(limits = c(0, 100))+
35 |   theme_bw() +
36 |     theme( 
37 |       legend.position="none",
38 |       panel.border = element_blank(),
39 |       panel.grid.major.x = element_blank(),
40 |       panel.grid.minor.x = element_blank(),
41 |       axis.text.x=element_text(size=15),
42 |       axis.text.y=element_text(size=15),
43 |       axis.title.y=element_text(size = 15),
44 |       axis.title.x=element_text(size = 15),
45 |     )
46 | 
47 | pdf("Landrace.pdf",height = 9,width = 9)
48 | grid.arrange(p[[1]],p[[2]],p[[3]],nrow=3)
49 | dev.off()
50 | pdf("Cultivar.pdf",height = 9,width = 9)
51 | grid.arrange(p[[4]],p[[5]],p[[6]],nrow=3)
52 | dev.off()
53 | 
54 | 
55 | #qq
56 | 
57 | qq_dat <- data.frame(obs=-log10(sort(data$P,decreasing=FALSE)),
58 |                          exp=-log10( ppoints(length(data$P))))
59 | ggplot(data=qq_dat,aes(exp,obs))+
60 |       geom_point(alpha=0.7,color="#7F7F7FFF")+
61 |       geom_abline(color="#D62728FF")+
62 |       xlab("Expected -log10(P-value)")+
63 |       ylab("Observed -log10(P-value)")+
64 |       scale_x_continuous(limits = c(0,7))+
65 |       scale_y_continuous(limits = c(0,7))+
66 |       #ggtitle(name)+
67 |       theme(
68 |         plot.title = element_text(color="red", size=20, face="bold.italic"),
69 |         axis.title = element_text(size=12,face="bold"),
70 |         axis.text = element_text(face="bold",size=8,color = "black"),
71 |         #axis.line = element_line(size=0.8,color="black"),
72 |         axis.ticks= element_line(size=0.8,colour = "black"),
73 |         panel.grid =element_blank(),
74 |         panel.border = element_rect(fill=NA,size = 0.8),
75 |         panel.background = element_blank())
76 | pdf(name1,width = 6,height = 3)
77 | grid.arrange(p[[1]],p[[2]],nrow=1)
78 | dev.off()
79 | 
80 | 


--------------------------------------------------------------------------------
/07_VMap3/15_Rareallele.R:
--------------------------------------------------------------------------------
 1 | #Rare Allele
 2 | setwd("/Users/guoyafei/Documents/02_VmapIII/10_RareAllele")
 3 | data <- read.table("sum2.txt", header=T, stringsAsFactors = F)
 4 | 
 5 | library(reshape2)
 6 | md <- melt(data, id="id")
 7 | ggplot(md, aes(x=id, y=value, color=id)) + 
 8 |   geom_bar(stat = 'identity') + 
 9 |   facet_grid(variable~.)+
10 |   #scale_color_manual(values = c("#66C2A5","#FC8D62","#8DA0CB","#E78AC3","#FFD92F")) +
11 |   #geom_jitter(shape=16, position=position_jitter(0.2)) +
12 |   xlab("")+ylab("")+theme_bw()+
13 |   theme(plot.title = element_text(color="red", size=10, face="bold.italic"),legend.position = "null",axis.text.x = element_text(size = 10), axis.title.x = element_text(size = 10),axis.text.y = element_text(size = 10),axis.title.y = element_text(size = 10))
14 | 


--------------------------------------------------------------------------------
/07_VMap3/17_N-content.R:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Documents/02_Vmap3/before0219/02_DataSource/05_WEGA")
 2 | library(ncdf4) # package for netcdf manipulation
 3 | library(raster) # package for raster manipulation
 4 | library(rgdal) # package for geospatial analysis
 5 | library(ggplot2)
 6 | library(ggmap)
 7 | library(RColorBrewer)
 8 | 
 9 | library(rasterVis)
10 | 
11 | data <- read.table("369sample.txt",header=T, stringsAsFactors = F,sep="\t")
12 | 
13 | N <- nc_open("/Users/guoyafei/公共资源/01_数据资源/TN5min.nc", write=FALSE, readunlim=TRUE, verbose=FALSE,
14 |               auto_GMT=TRUE, suppress_dimvals=FALSE, return_on_error=FALSE )
15 | lon <- ncvar_get(N, "lon")
16 | lat <- ncvar_get(N,"lat",verbose=F)
17 | t <- ncvar_get(N,"time")
18 | ndvi.array <- ncvar_get(N,"TN")
19 | dim(ndvi.array)
20 | filevalue <- ncatt_get(N,"TN")
21 | ndvi.array[ndvi.array==filevalue$value] <- NA
22 | ndvi.slice <- ndvi.array[,,1]
23 | dim(ndvi.slice)
24 | r <- raster(t(ndvi.slice), xmn=min(lon), xmx=max(lon), ymn=min(lat), ymx=max(lat), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
25 | r <- raster(t(ndvi.slice), xmn=min(lon), xmx=max(lon), ymn=min(lat), ymx=max(lat))
26 | 
27 | plot(r)
28 | 
29 | points(landrace$Longitude,landrace$Latitude,pch = 0, bg = "grey",cex=0.5)
30 | points(cultivar$Longitude,cultivar$Latitude,pch = 8, bg = "grey",cex=0.5)
31 | 
32 | sample <- data[,c(9,12,11)]
33 | sample <- sample[which(sample$Longitude != "NA"),]
34 | landrace <- sample[which(sample$type.USDA_8. == "Landrace"),]
35 | cultivar <- sample[which(sample$type.USDA_8. == "Cultivar"),]
36 | 
37 | 
38 | 
39 | 
40 | sample <- data[,c(12,11)]
41 | sample$NC <- extract(r, sample)
42 | sample$source <- data$DataSource
43 | sample <- sample[which(sample$NC != "NA"),]
44 | #绘图
45 | mp<-NULL
46 | mapworld<-borders("world",colour = "gray70",fill="gray70") 
47 | mp<-ggplot()+mapworld+ylim(-60,90) +theme_classic()
48 | color <- brewer.pal(8, "Dark2")[c(1,2,3,4,6)]
49 | #AABBDD----
50 | mp+geom_point(aes(x=sample$Longitude, y=sample$Latitude, color=sample$NC,shape = sample$source),size=1,alpha=0.7)+
51 |   scale_size(range=c(1,1))+ 
52 |   #scale_color_manual(values = color) +
53 |   scale_colour_gradient2(low = "dark red", mid = "white", high = "dark blue", midpoint = 15)+
54 |   theme_classic()+
55 |   theme(axis.text.x = element_text(size = 20),axis.title.x = element_text(size = 20),axis.text.y = element_text(size = 20),axis.title.y = element_text(size = 20))+
56 |   #scale_fill_manual(values=c("#97FFFF", "#FFD700", "#FF6347", "#8470FF","#D8BFD8"), 
57 |   #                   breaks=c("AA", "SS", "DD", "AABB","AABBDD"),
58 |   #                   labels=c("AA", "SS", "DD", "AABB","AABBDD"))+
59 |   theme(axis.text = element_blank()) +   ## 删去所有刻度标签
60 |   theme(axis.ticks = element_blank()) 
61 | #theme(panel.grid =element_blank()) + 
62 | #theme(axis.ticks.y = element_blank())
63 | 
64 | 
65 | 
66 | 
67 | colr <- colorRampPalette(brewer.pal(11, 'RdYlBu')[3:9])
68 | levelplot(r, 
69 |           margin=FALSE,# suppress marginal graphics
70 |           ylim=c(-50,80),
71 |           colorkey=list(
72 |             space='bottom',                   # plot legend at bottom
73 |             labels=list(at=-5:5, font=4)      # legend ticks and labels 
74 |           ),    
75 |           par.settings=list(
76 |             axis.line=list(col='transparent') # suppress axes and legend outline
77 |           ),
78 |           scales=list(draw=FALSE),            # suppress axis labels
79 |           col.regions=colr,                   # colour ramp
80 |           at=seq(-10, 50)) 
81 | #layer(sp.polygons(oregon, lwd=3))           # add oregon SPDF with latticeExtra::layer
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/07_VMap3/19_lm.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | a <- read.table("/Users/guoyafei/Desktop/test.txt",header=T,stringsAsFactors = F,sep="\t")
 3 | 
 4 | p <- ggplot(data = a, aes(x = Date, y = Proportion)) +
 5 |   geom_smooth(method = "lm",
 6 |    color="#558ebd",
 7 |                 fill="lightgray",
 8 |                alpha=.7,
 9 |                 size=0.8,se=T,
10 |                 formula = y~x) +
11 |  geom_point(size=4,
12 |             alpha=0.7,
13 |            color="#fe654c")+ xlim(-10000,-6000)+ ylim(0,1)+
14 |  theme_bw()
15 | p
16 | 
17 | lm_eqn <- function(a){
18 |   m <- lm(Proportion ~ Date, a);
19 |   eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
20 |                    list(a = format(unname(coef(m)[1]), digits = 5),
21 |                         b = format(unname(coef(m)[2]), digits = 2),
22 |                         r2 = format(summary(m)$r.squared, digits = 3)))
23 |   as.character(as.expression(eq));
24 | }
25 | 
26 | p1 <- p + 
27 |   geom_text(x = -9000, y = 0.75, 
28 |             label = lm_eqn(a), parse = TRUE)
29 | p1
30 | 


--------------------------------------------------------------------------------
/07_VMap3/22_recombinationRate.R:
--------------------------------------------------------------------------------
 1 | #recombination rate
 2 | library(ggplot2)
 3 | setwd("/Users/guoyafei/公共资源/01_数据资源/1.0_recombination_rate")
 4 | iwgsc1 <- read.table("iwgsc_refseqv1.0_mapping_data.txt", header=T,stringsAsFactors = F)
 5 | ggplot(iwgsc1, aes(x=physicalPosition, y=geneticPosition,group=chromosome)) +
 6 |   geom_point(size=0.5)+
 7 |   facet_wrap(vars(chromosome), nrow = 7,scales = "free")+
 8 |   geom_line()+
 9 |   theme_bw()
10 | 
11 | iwgsc2 <- read.table("iwgsc_refseqv1.0_recombination_rate.txt", header=T,stringsAsFactors = F)
12 | ggplot(iwgsc2, aes(x=intervalStart, y=nbOfSnps,group=chromosome)) +
13 |   geom_point(size=0.3)+
14 |   facet_wrap(vars(chromosome), nrow = 7,scales = "free")+
15 |   geom_line()+
16 |   theme_bw()
17 | 
18 | yafei <- read.table("RecombinationRate_1M_yafei.txt", header=T,stringsAsFactors = F)
19 | ggplot(yafei, aes(x=intervalStart, y=recombinationRate,group=chromosome)) +
20 |   geom_point(size=0.3)+
21 |   facet_wrap(vars(chromosome), nrow = 7,scales = "free")+
22 |   geom_line()+
23 |   theme_bw()
24 | 
25 | lipeng <- read.table("recombination_rate_geva_lipeng.txt", header=T,stringsAsFactors = F)
26 | ggplot(lipeng, aes(x=Position.bp., y=Map.cM.,group=Chromosome)) +
27 |   geom_point(size=0.3)+
28 |   facet_wrap(vars(Chromosome), nrow = 7,scales = "free")+
29 |   geom_line()+
30 |   theme_bw()
31 | 
32 | xue <- read.table("slidewindow_recomrate_updown_20M_xpclr_xuebo.txt", header=F,stringsAsFactors = F)
33 | colnames(xue) <- c("chromosome","intervalStart","intervalEnd","nbOfSnps","recombinationRate")
34 | ggplot(xue, aes(x=intervalStart, y=recombinationRate,group=chromosome)) +
35 |   geom_point(size=0.3)+
36 |   facet_wrap(vars(chromosome), nrow = 7,scales = "free")+
37 |   geom_line()+
38 |   theme_bw()
39 | 
40 | xue <- read.table("RecombinationRate_1M_xuebo.txt", header=F,stringsAsFactors = F)
41 | colnames(xue) <- c("chromosome","intervalStart","intervalEnd","nbOfSnps","recombinationRate")
42 | ggplot(xue, aes(x=intervalStart, y=nbOfSnps,group=chromosome)) +
43 |   geom_point(size=0.3)+
44 |   facet_wrap(vars(chromosome), nrow = 7,scales = "free")+
45 |   geom_line()+
46 |   theme_bw()
47 | 
48 | ggplot(D, mapping = aes(x = V4)) +
49 |   geom_density( alpha = 0.5, color = "#FF7F00",fill="#FF7F00") +
50 |   theme_bw()
51 | p <- ggplot(don, aes(x=BPcum, y=P)) +
52 |   geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1) +
53 |   scale_color_manual(values = rep(c("grey","grey", "skyblue", "skyblue"), 7)) +
54 |   scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
55 |   scale_y_continuous(expand = c(0, 0) ) + 
56 |   theme_bw() +
57 |   ylab("CLR")+
58 |   theme( 
59 |     legend.position="none",
60 |     panel.border = element_blank(),
61 |     panel.grid.major.x = element_blank(),
62 |     panel.grid.minor.x = element_blank(),
63 |     axis.text.x=element_text(size=12),
64 |     axis.text.y=element_text(size=12),
65 |     axis.title.y=element_text(size = 12),
66 |     axis.title.x=element_text(size = 12),
67 |   )+
68 |   geom_hline(yintercept=thresh1,color='#E69F00',linetype = "dashed")
69 | ggplot(all, aes(x=V1, y=V2, color=type)) + 
70 |   geom_bar(stat="identity",width=0.5,position='dodge') + 
71 |   xlab("")+
72 |   ylab("")+
73 |   theme_bw()
74 |   
75 | 


--------------------------------------------------------------------------------
/07_VMap3/29_correlation.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | genome <- read.table("/Users/guoyafei/公共资源/01_数据资源/chrConvertionRule.txt", header=T,stringsAsFactors = F)
 3 | ge <- genome[c(2:43),c(1,3)]
 4 | 
 5 | sum <- read.table("/Users/guoyafei/Desktop/Va.txt", header=F,stringsAsFactors = F)
 6 | mean <- read.table("/Users/guoyafei/Desktop/mean2.txt", header=F,stringsAsFactors = F)
 7 | snpnum <- read.table("/Users/guoyafei/Desktop/snpnum.txt", header=F,stringsAsFactors = F)
 8 | 
 9 | all <- cbind(ge,sum)[,c(2,4)]
10 | 
11 | colnames(all) <- c("ID","length","sum","mean","snpnum")
12 | all$V3 <- all$V2/10000000
13 | 
14 | a <- summary(lm(as.numeric(sum[,2])~ as.numeric(snpnum[,2])))
15 | a <- summary(lm(as.numeric(ge[,2])~ as.numeric(sum[,2])))
16 | a <- summary(lm(as.numeric(ge[,2])~ as.numeric(snpnum[,2])))
17 | 
18 | x <- append(x,a$adj.r.squared)
19 | 
20 | ggplot(data=all, aes(x=EndIndex.exclusive., y=V3))+
21 |   geom_point(size=3,aes(x=EndIndex.exclusive., y=V3))+
22 |   stat_smooth(method="lm")+
23 |   xlab("length")+
24 |   ylab("variance explained")+
25 |   theme_classic()


--------------------------------------------------------------------------------
/07_VMap3/34_adapgene.R:
--------------------------------------------------------------------------------
 1 | #adaptative genes
 2 | setwd("~/Desktop/adapgene/")
 3 | #画图：UpSetR
 4 | library(UpSetR)
 5 | Name <- c("elevation.txt","prec1.txt","prec2.txt","prec3.txt","prec4.txt","prec5.txt","prec6.txt","prec7.txt","prec8.txt","soil11.txt","soil12.txt","soil13.txt","soil2.txt","soil4.txt","soil5.txt","soil6.txt","soil7.txt","soil8.txt","soil9.txt","solar1.txt","solar2.txt","solar3.txt","temp10.txt","temp11.txt","temp1.txt","temp2.txt","temp3.txt","temp4.txt","temp5.txt","temp6.txt","temp7.txt","temp8.txt","temp9.txt")
 6 | p <- list()
 7 | for(i in c(1:length(Name))){
 8 |   data <- read.table(Name[i],header=F,stringsAsFactors = F)
 9 |   p[[i]] <- data
10 | }
11 | listInput <- list(SF2 = p[[1]][,1], H12 = p[[2]][,1], TajimaD = p[[3]][,1])
12 | upset(fromList(listInput),  order.by = "freq",text.scale = c(2),point.size = 3.5, line.size = 2)
13 | 
14 | data <- read.table("/Users/guoyafei/Desktop/adapgene/rep.jaccard.pos.crosstab", header=T, row.names = 1,stringsAsFactors = F)
15 | corrgram(data, order = F,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,gap = 0.1,
16 |          main="Correlogram of environment variables intercorrelations")
17 | 
18 | data <- read.table("/Users/guoyafei/Desktop/adapgene/rep.num.pos.crosstab", header=T, row.names = 1,stringsAsFactors = F)
19 | corrgram(data, order = F,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,gap = 0.1,
20 |          main="Correlogram of environment variables intercorrelations")
21 | 
22 | data <- read.table("/Users/guoyafei/Desktop/bio-RDA/471_baypass_taxa.Info", header=T, row.names = 1, stringsAsFactors = F)
23 | sub <- data[,c(3,6:40)]
24 | sub2 <- sub[,c(1,4,8,11,13,24,34,36)]
25 | 
26 | corrgram(sub2, order = F,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,gap = 0.1,
27 |          main="Correlogram of environment variables intercorrelations")
28 | 
29 | library(usdm)
30 | vif(sub)
31 | v1 <- vifcor(sub2,th=0.9)
32 | 
33 | v1
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/07_VMap3/36_checksample.R:
--------------------------------------------------------------------------------
1 | setwd("/Users/guoyafei/Documents/02_Vmap3/before0219/14_VMap3QC/QC")
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/07_VMap3/37_traitgwas.R:
--------------------------------------------------------------------------------
 1 | #-----qq----
 2 | library(ggplot2)
 3 | library(qqman)
 4 | library(tidyverse)
 5 | setwd("/Users/guoyafei/Desktop/traitgwas/pdf")
 6 | path <- "/Users/guoyafei/Desktop/traitgwas/plot/tasselgwas/cultivar"
 7 | fileNames <- dir(path)
 8 | filePath <- sapply(fileNames, function(x){ 
 9 |   paste(path,x,sep='/')})
10 | data <- lapply(filePath, function(x){
11 |   read.table(x, header=T,stringsAsFactors = F)})
12 | p <- list()
13 | q <- list()
14 | data <- read.table("cultivar.test.txt",header=F,stringsAsFactors = F)
15 | colnames(data) <- c("SNP","CHR","BP","P")
16 | for ( i in c(1:7)){
17 |   #qq plot
18 |   n1 <- strsplit( names(data)[i], ".shuf50k.")[[1]][1]
19 |   filename1 <- paste(n1,"cultivar.tassel.qq",sep=".")
20 |   filename2 <- paste(n1,"cultivar.tassel",sep=".")
21 |   #colnames(data[[i]]) <- c("CHR","SNP","BP","P")
22 |   colnames(data[[i]]) <- c("SNP","CHR","BP","P")
23 |   qq_dat <- data.frame(obs=-log10(sort(data$P,decreasing =FALSE)),
24 |                        exp=-log10( ppoints(length(data$P))))
25 |   q[[i]]q <- ggplot(data=qq_dat,aes(exp,obs))+
26 |     geom_point(alpha=0.7,color="#7F7F7FFF")+
27 |     geom_abline(color="#D62728FF")+
28 |     xlab("Expected -log10(P-value)")+
29 |     ylab("Observed -log10(P-value)")+
30 |     scale_x_continuous(limits = c(0,7))+
31 |     scale_y_continuous(limits = c(0,7))+
32 |     #ggtitle(name)+
33 |     theme(
34 |       plot.title = element_text(color="red", size=20, face="bold.italic"),
35 |       axis.title = element_text(size=12,face="bold"),
36 |       axis.text = element_text(face="bold",size=8,color = "black"),
37 |       #axis.line = element_line(size=0.8,color="black"),
38 |       axis.ticks= element_line(size=0.8,colour = "black"),
39 |       panel.grid =element_blank(),
40 |       panel.border = element_rect(fill=NA,size = 0.8),
41 |       panel.background = element_blank())
42 |   pdf(paste(filename1,".pdf",sep=""),height = 3,width = 3)
43 |   print(q[[i]])
44 |   dev.off()
45 |   #manhuttan plot
46 |   gwasResults <- data[[i]]
47 |   don <- gwasResults %>% 
48 |     group_by(CHR) %>% 
49 |     summarise(chr_len=max(BP)) %>% 
50 |     mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>%
51 |     select(-chr_len) %>%
52 |     left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
53 |     arrange(CHR, BP) %>%
54 |     mutate( BPcum=BP+tot) 
55 |   axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) )/ 2)
56 |   p[[i]] p <- ggplot(don, aes(x=BPcum, y=-log10(P))) +
57 |     geom_point( aes(color=as.factor(CHR)), alpha=0.5, size=1) +
58 |     scale_color_manual(values = rep(c("#999999","#999999", "#0072B2", "#0072B2","#E69F00", "#E69F00"), 7)) +
59 |     # custom X axis:
60 |     scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
61 |     scale_y_continuous(expand = c(0, 0) ) +   
62 |     theme_bw() +
63 |     ylim(0,7)+
64 |     theme(
65 |       legend.position="none",
66 |       panel.border = element_blank(),
67 |       panel.grid.major.x = element_blank(),
68 |       panel.grid.minor.x = element_blank(),
69 |       axis.text.x=element_text(size=8),
70 |       axis.text.y=element_text(size=10),
71 |       axis.title.y=element_text(size = 10),
72 |       axis.title.x=element_text(size = 10),
73 |     )+
74 |     scale_y_continuous(limits = c(0,7))+
75 |     geom_hline(yintercept = -log10(0.00001), colour="red",linetype=2, size=1)
76 |   pdf(paste(filename2,".pdf",sep=""),height = 2,width = 9.6)
77 |   
78 |   print(p[[i]])
79 |   dev.off()
80 | }
81 | 
82 | pdf("cultivar.test.pdf",height = 2,width = 9.6)
83 | print(p)
84 | dev.off()
85 | 


--------------------------------------------------------------------------------
/07_VMap3/38_bayesR.R:
--------------------------------------------------------------------------------
1 | #bayesR
2 | data <- read.table("bayesR.txt", header=F,stringsAsFactors = F)
3 | data2 <- read.table("chrom.txt", header=T,stringsAsFactors = F)
4 | 
5 | ggplot(sub, aes(length, Va,color=type)) +
6 |   geom_point( size=3)+
7 |   scale_color_manual(values = c("#999999","#0072B2","#E69F00"))+
8 |   theme_bw()
9 | 


--------------------------------------------------------------------------------
/07_VMap3/46_fastcall2测试.R:
--------------------------------------------------------------------------------
 1 | #FastCall2
 2 | setwd("/Users/guoyafei/Desktop/FC2/compare/2.0")
 3 | 
 4 | library(ggplot2)
 5 | input <- paste("chr",1:42,".all.txt",sep="")
 6 | output <- paste("chr",1:42,".all.pdf",sep="")
 7 | for(i in c(1:42)){
 8 |   data <- read.table(input[i], header=F, stringsAsFactors = F)
 9 |   pdf(output[i])
10 |   p <- ggplot(data,aes(V2,fill=V1))+
11 |     geom_histogram(bins=10,aes(x=V2, y=..density..),position="identity",alpha = 0.5)+
12 |     theme_bw()
13 |   plot(p)
14 |   dev.off()
15 | }
16 | 
17 | 
18 | data <- read.table("/Users/guoyafei/Desktop/FC2/compare/2.0_raw/file2.0_raw.all.txt", header=F,stringsAsFactors = F)
19 | ggplot(data,aes(V2,fill=V1))+
20 |   geom_histogram(bins=10,aes(x=V2, y=..density..),position = 'dodge',alpha = 0.9)+
21 |   scale_fill_manual(values = c("#FDC086","#BEAED4")) +
22 |   theme_bw()
23 | 
24 | data <- read.table("/Users/guoyafei/Desktop/FC2/compare/2.1/file2.1.all.txt", header=F,stringsAsFactors = F)
25 | ggplot(data,aes(V2,fill=V1))+
26 |   geom_histogram(bins=10,aes(x=V2, y=..density..),position = 'dodge',alpha = 0.9)+
27 |   scale_fill_manual(values = c("#FDC086","#BEAED4")) +
28 |   theme_bw()
29 | 
30 | data <- read.table("/Users/guoyafei/Desktop/FC2/compare/filegatk.all.txt", header=F,stringsAsFactors = F)
31 | ggplot(data,aes(V2,fill=V1))+
32 |   geom_histogram(bins=15,aes(x=V2, y=..density..),position="identity",alpha = 0.5)+
33 |   theme_bw()


--------------------------------------------------------------------------------
/07_VMap3/47-物种分布模型.R:
--------------------------------------------------------------------------------
 1 | #物种分布模型
 2 | library(gradientForest)
 3 | library(RColorBrewer)
 4 | library(rasterVis)
 5 | library(LEA)
 6 | library(raster)
 7 | library(adegenet)
 8 | library(maps)
 9 | library(dismo)
10 | library(gplots)
11 | library(gdistance)
12 | 
13 | 


--------------------------------------------------------------------------------
/07_VMap3/48_lassip.R:
--------------------------------------------------------------------------------
 1 | library(qqman)
 2 | library(tidyverse)
 3 | setwd("/Users/guoyafei/Desktop/lassip/pdf")
 4 | path <- "/Users/guoyafei/Desktop/lassip/plot"
 5 | fileNames <- dir(path)
 6 | filePath <- sapply(fileNames, function(x){
 7 |   paste(path,x,sep='/')})
 8 | data <- lapply(filePath, function(x){
 9 |   read.table(x, header=T,stringsAsFactors = F)})
10 | 
11 | #threshold
12 | #library(gdata)
13 | #thresh <- read.xls("thresh.xlsx",sheet=1,row.name=1,na.strings=c("NA","#DIV/0!"))
14 | for (i in seq(1:length(data))){
15 |   filename <- strsplit(names(data)[i], "_")[[1]][1]
16 |   p <- list()
17 |   gwasResults <- data[[i]]
18 |   colnames(gwasResults)[1:2] <- c("CHR", "BP")
19 |   don <- gwasResults %>% 
20 |     group_by(CHR) %>% 
21 |     summarise(chr_len=max(BP)) %>% 
22 |     mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>%
23 |     select(-chr_len) %>%
24 |     left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
25 |     arrange(CHR, BP) %>%
26 |     mutate( BPcum=BP+tot) 
27 |   axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) )/ 2)
28 |   p[[i]] <- ggplot(don, aes(x=BPcum, y=pop1_T)) +
29 |     geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) +
30 |     scale_color_manual(values = rep(c("grey", "#56B4E9", "#D55E00"), 7)) +
31 |     # custom X axis:
32 |     scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
33 |     scale_y_continuous(expand = c(0, 0) ) +   
34 |     theme_bw() +
35 |     theme( 
36 |       legend.position="none",
37 |       panel.border = element_blank(),
38 |       panel.grid.major.x = element_blank(),
39 |       panel.grid.minor.x = element_blank(),
40 |       axis.text.x=element_text(size=8),
41 |       axis.text.y=element_text(size=10),
42 |       axis.title.y=element_text(size = 10),
43 |       axis.title.x=element_text(size = 10),
44 |     )
45 |   #scale_y_continuous(limits = c(0,7))+
46 |   #geom_hline(yintercept = -log10(0.0699), colour="red",linetype=2, size=1)
47 |   pdf(paste(filename,"_T.pdf",sep=""),height = 3,width = 6.5)
48 |   print(p[[i]])
49 |   dev.off()
50 | }
51 | 
52 | all <- data[[11]]
53 | ggplot(all, aes(pop1_g2g1, pop1_g123)) +
54 |   geom_point(size=2, alpha = 0.5,colour = "grey",shape = 20) +
55 |   #geom_point(data = sub_gene1, aes(V2, V3, size=2, alpha = 0.5,colour = "red")) +
56 |   geom_vline(xintercept=0.059,color='red',linetype = "dotted")+
57 |   #geom_hline(yintercept=0.059,color='red',linetype = "dotted")+
58 |   #geom_text_repel(data = sub_gene1,aes(V2, V3, label = name),max.overlaps = 100) +
59 |   xlab("H2/H1")+
60 |   ylab("H12")+
61 |   theme_bw()+
62 |   theme(legend.position="none",legend.title=element_blank(),axis.text.x = element_text(size = 15), axis.title.x = element_text(size = 15),axis.text.y = element_text(size = 15),axis.title.y = element_text(size = 15))
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/07_VMap3/49_RAiSD.R:
--------------------------------------------------------------------------------
 1 | library(qqman)
 2 | library(tidyverse)
 3 | setwd("/Users/guoyafei/Desktop/RAiSD/pdf")
 4 | path <- "/Users/guoyafei/Desktop/RAiSD/plot"
 5 | fileNames <- dir(path)
 6 | filePath <- sapply(fileNames, function(x){
 7 |   paste(path,x,sep='/')})
 8 | data <- lapply(filePath, function(x){
 9 |   read.table(x, header=F,stringsAsFactors = F)})
10 | 
11 | #threshold
12 | #library(gdata)
13 | #thresh <- read.xls("thresh.xlsx",sheet=1,row.name=1,na.strings=c("NA","#DIV/0!"))
14 | for (i in seq(1:length(data))){
15 |   filename <- strsplit(names(data)[i], "_")[[1]][1]
16 |   p <- list()
17 |   gwasResults <- data[[i]]
18 |   colnames(gwasResults) <- c("CHR", "BP", "P")
19 |   don <- gwasResults %>% 
20 |     group_by(CHR) %>% 
21 |     summarise(chr_len=max(BP)) %>% 
22 |     mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>%
23 |     select(-chr_len) %>%
24 |     left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
25 |     arrange(CHR, BP) %>%
26 |     mutate( BPcum=BP+tot) 
27 |   axisdf <- don %>% group_by(CHR) %>% summarize(center=( max(BPcum) + min(BPcum) )/ 2)
28 |   p[[i]] <- ggplot(don, aes(x=BPcum, y=P)) +
29 |     geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) +
30 |     scale_color_manual(values = rep(c("grey", "#56B4E9", "#D55E00"), 7)) +
31 |     # custom X axis:
32 |     scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
33 |     scale_y_continuous(expand = c(0, 0) ) +   
34 |     theme_bw() +
35 |     theme( 
36 |       legend.position="none",
37 |       panel.border = element_blank(),
38 |       panel.grid.major.x = element_blank(),
39 |       panel.grid.minor.x = element_blank(),
40 |       axis.text.x=element_text(size=8),
41 |       axis.text.y=element_text(size=10),
42 |       axis.title.y=element_text(size = 10),
43 |       axis.title.x=element_text(size = 10),
44 |     )
45 |     #scale_y_continuous(limits = c(0,7))+
46 |     #geom_hline(yintercept = -log10(0.0699), colour="red",linetype=2, size=1)
47 |   pdf(paste(filename,".pdf",sep=""),height = 3,width = 6.5)
48 |   print(p[[i]])
49 |   dev.off()
50 | }
51 | 
52 | #temp selection regions
53 | library(UpSetR)
54 | 
55 | setwd("/Users/guoyafei/Desktop/RAiSD/")
56 | #data <- read.table("temp.all.plot.txt", header=F, stringsAsFactors = F)
57 | 
58 | EU <- read.table("temp.EU.gene.plot.txt", header=F,stringsAsFactors = F)
59 | EU <- EU[,1]
60 | #EU <- data[,2]
61 | #EU <- EU[which(EU!="0")]
62 | WA <- read.table("temp.WA.gene.plot.txt", header=F,stringsAsFactors = F)
63 | WA <- WA[,1]
64 | #WA <- data[,2]
65 | #WA <- WA[which(WA!="0")]
66 | CA <- read.table("temp.CA.gene.plot.txt", header=F,stringsAsFactors = F)
67 | CA <- CA[,1]
68 | #CA <- data[,3]
69 | #CA <- CA[which(CA!="0")]
70 | SA <- read.table("temp.SA.gene.plot.txt", header=F,stringsAsFactors = F)
71 | SA <- SA[,1]
72 | #SA <- data[,4]
73 | #SA <- SA[which(SA!="0")]
74 | EA <- read.table("temp.EA.gene.plot.txt", header=F,stringsAsFactors = F)
75 | EA <- EA[,1]
76 | #EA <- data[,5]
77 | #EA <- EA[which(EA!="0")]
78 | AM <- read.table("temp.AM.gene.plot.txt", header=F,stringsAsFactors = F)
79 | AM <- AM[,1]
80 | #AM <- data[,6]
81 | #AM <- EA[which(EA!="0")]
82 | 
83 | 
84 | all <- list(
85 |   EU <- EU,
86 |   WA <- WA,
87 |   CA <- CA,
88 |   SA <- SA,
89 |   EA <- EA
90 | )
91 | 
92 | names(all) <- c("EU","WA","CA","SA","EA")
93 | upset(fromList(all), order.by = "freq")
94 | 
95 | upset(fromList(all), keep.order = TRUE,text.scale = c(1),point.size = 1.5, line.size = 1)
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/07_VMap3/51_LinkNe.R:
--------------------------------------------------------------------------------
 1 | #LinkNe
 2 | setwd("/Users/guoyafei/Desktop/LinkNe/")
 3 | library(ggplot2)
 4 | data <- read.table("ne.merge.25pop", header=F,stringsAsFactors = F)
 5 | sub <- data[which(data$V1 %in% c(400,200)),]
 6 | sub_decline <- sub[which(sub$V6 %in% c("pop1","pop2","pop3","pop4","pop6","pop8","pop9","pop10","pop11","pop13","pop14","pop16","pop17","pop19","pop20","pop22","pop23")),]
 7 | 
 8 | sub <- data[which(data$V1 %in% c(400,200)),]
 9 | sub_no_decline <- sub[which(sub$V6 %in% c("pop5","pop7","pop12","pop15","pop18","pop21","pop24","pop25")),]
10 |          
11 | pdf("test1.pdf",height=30,width=10)
12 | ggplot(sub_no_decline, aes(V1, V3,type=V6)) +
13 |   geom_point() +
14 |   geom_line() +
15 |   theme_classic()+
16 |   ylim(150,1400)+
17 |   labs(x = 'generation', y = 'Ne')
18 | print(p)
19 | dev.off()
20 | 
21 | a <- paste("pop",c(9,16,17,25,2,7,20,19),sep="")
22 | sub <- data[which(data$V3 %in% a),]
23 | b <- paste("pop",c(9,16,17,25,2,7,20,19,15,22,1,5,8,11,6,13,4,10.21),sep="")
24 | sub <- data[which(data$V3 %in% b),]
25 | 


--------------------------------------------------------------------------------
/07_VMap3/54_density.R:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Documents/Vmap3/density")
 2 | require(RIdeogram)
 3 | 
 4 | 
 5 | gene_density <- read.table("all.count.txt", header=T, stringsAsFactors = F)
 6 | 
 7 | wheat_karyotype <- read.table("wheat_karyotype.txt", header=T, stringsAsFactors = F)
 8 | ideogram(karyotype = wheat_karyotype, overlaid = gene_density)
 9 | convertSVG("chromosome.svg", device = "pdf")
10 | 
11 | #wheat_karyotype
12 | #Chr Start       End  CE_start    CE_end
13 | #1     0 248956422 122026459 124932724
14 | #2     0 242193529  92188145  94090557
15 | 
16 | #gene_density
17 | #Chr   Start     End Value
18 | #1       1 1000000    65
19 | #1 1000001 2000000    76
20 | library(ggExtra)
21 | setwd("/Users/guoyafei/Desktop/毕业论文/fig/depth")
22 | 
23 | data <- read.table("v3.0.depth.DD.txt", header=T, stringsAsFactors = F)
24 | p <- ggplot() +
25 |   geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) +
26 |   xlim(0,15)+
27 |   ylim(0,10)+
28 |   theme_bw()
29 | p1 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue")
30 | 
31 | data <- read.table("v3.2.depth.DD.txt", header=T, stringsAsFactors = F)
32 | p <- ggplot() +
33 |   geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) +
34 |   xlim(6,14)+
35 |   ylim(3,8)+
36 |   theme_bw()
37 | p2 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue")
38 | 
39 | data <- read.table("v3.0.depth.AB.txt", header=T, stringsAsFactors = F)
40 | p <- ggplot() +
41 |   geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) +
42 |   xlim(0,15)+
43 |   ylim(0,10)+
44 |   theme_bw()
45 | p3 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue")
46 | 
47 | data <- read.table("v3.2.depth.AB.txt", header=T, stringsAsFactors = F)
48 | p <- ggplot() +
49 |   geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) +
50 |   xlim(4,10)+
51 |   ylim(3,8)+
52 |   theme_bw()
53 | p4 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue")
54 | 
55 | 
56 | data <- read.table("v3.0.depth.ABD.txt", header=T, stringsAsFactors = F)
57 | p <- ggplot() +
58 |   geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) +
59 |   xlim(0,15)+
60 |   ylim(0,10)+
61 |   theme_bw()
62 | p5 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue")
63 | 
64 | data <- read.table("v3.2.depth.ABD.txt", header=T, stringsAsFactors = F)
65 | p <- ggplot() +
66 |   geom_point(data = data,aes(x=mean,y=sd),alpha=0.1) +
67 |   xlim(5,11)+
68 |   ylim(3,7)+
69 |   theme_bw()
70 | p6 <- ggMarginal(p, type = "density", margins = "both", size = 5, fill = "lightblue", color = "blue")
71 | 
72 | pdf("ori.D.pdf", width=4,height=4)
73 | print(p1)
74 | dev.off()
75 | 
76 | pdf("filter.D.pdf", width=4,height=4)
77 | print(p2)
78 | dev.off()
79 | 
80 | pdf("ori.AB.pdf", width=4,height=4)
81 | print(p3)
82 | dev.off()
83 | 
84 | pdf("filter.AB.pdf", width=4,height=4)
85 | print(p4)
86 | dev.off()
87 | 
88 | pdf("ori.ABD.pdf", width=4,height=4)
89 | print(p5)
90 | dev.off()
91 | 
92 | pdf("filter.ABD.pdf", width=4,height=4)
93 | print(p6)
94 | dev.off()
95 | 
96 | 


--------------------------------------------------------------------------------
/07_VMap3/Lulab_germplasm_Info.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/07_VMap3/Lulab_germplasm_Info.xlsx


--------------------------------------------------------------------------------
/07_VMap3/Lulab_germplasm_Storage.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/07_VMap3/Lulab_germplasm_Storage.xlsx


--------------------------------------------------------------------------------
/BSAseq/QTLseq_BSA.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/BSAseq/QTLseq_BSA.R


--------------------------------------------------------------------------------
/BSAseq/gatk_step1_Index.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | # building sequence alginment dictionary, samtools faidx and gatk creatSequenceDictionary
 3 | #Usage: sh gatk_step1.sh /path/your_genome.fasta 
 4 | bwa=/usr/bin/bwa                   # set where to find software 
 5 | gatk=/mnt/zhou/hangyuan/miniconda3/bin/gatk
 6 | samtools=/usr/local/bin/samtools
 7 |  
 8 | #bwa index
 9 | reference=$1
10 | time $bwa index "$reference" && echo "** bwa index done! ** "
11 | #samtools index
12 | time  $samtools faidx $reference && echo "** samtools faidx done! ** "
13 |  
14 | #注意：使用GATK之前，需要先建立参考基因组索引文件.dict和.fai
15 | #.dict中包含了基因组中contigs的名字，也就是一个字典；
16 | #.fai也就是fasta index file，索引文件，可以快速找出参考基因组的碱基，由samtools faidx构建
17 | #构建.dict文件（原来要使用picard的CreateSequenceDictionary模块，但是现在gatk整合了此模块，可以直接使用）
18 | # gatk createSequenceDictionary
19 | time $gatk --java-options "-Xmx100G -Djava.io.tmpdir=./tmp" CreateSequenceDictionary \
20 |     -R "$reference" \
21 |     -O "$reference.dict" \
22 |     && echo "** gatk createSequenceDictionary done! **"


--------------------------------------------------------------------------------
/BSAseq/gatk_step2_callvar.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # this is a whole gonome gatk snp calling full process
 3 | #Uage: sh gatk_step2.sh sample_1.fastq sample_2.fastq out_dictionary
 4 | fastp=/usr/local/bin/fastp
 5 | bwa=/usr/bin/bwa
 6 | gatk=/mnt/zhou/hangyuan/miniconda3/bin/gatk
 7 | samtools=/usr/local/bin/samtools
 8 | reference=/mnt/zhou/hangyuan/BSA-seq/P20210613_AtBSA-raw.bam.results_20210621/01.rawData/TAIR10.fa
 9 | NTHREADS=30
10 | fq1=$1
11 | fq2=$2
12 | sample=${fq1%%_*}
13 | outdir=$3
14 | outdir=${outdir}/${sample}
15 |  
16 | if [ ! -d "$outdir/cleanfq" ]
17 | then mkdir -p "$outdir/cleanfq"
18 | fi
19 |  
20 | if [ ! -d "$outdir/bwa" ]
21 | then mkdir -p "$outdir/bwa"
22 | fi
23 |  
24 | if [ ! -d "$outdir/gatk" ]
25 | then mkdir -p "$outdir/gatk"
26 | fi
27 |  
28 | time $fastp -i "$fq1" \
29 |             -I "$fq2" \
30 |             -o "$outdir/cleanfq/${sample}_1.fastq" \
31 |             -O "$outdir/cleanfq/${sample}_2.fastq" \
32 |             -h "$outdir/cleanfq/${sample}.html" \
33 |             -j "$outdir/cleanfq/${sample}.json" \
34 |                  && echo '** fq QC done'
35 | time $bwa mem -M -R "@RG\\tID:${sample}\\tSM:${sample}\\tPL:ILLUMINA" \
36 |           -t $NTHREADS \
37 |           $reference \
38 |           "$outdir/cleanfq/${sample}_1.fastq"  "$outdir/cleanfq/${sample}_2.fastq" \
39 |           | $samtools view -bS - > "$outdir/bwa/${sample}.bam"\
40 |           && echo '** BWA MEM done ** '
41 | time $samtools sort \
42 |           -@ $NTHREADS \
43 |           -O bam \
44 |           -o "$outdir/bwa/${sample}.sorted.bam" \
45 |           "$outdir/bwa/${sample}.bam"\
46 |            && echo "** sorted raw bamfiles done"
47 | time $samtools index "$outdir/bwa/${sample}.sorted.bam" && echo "** ${sample}.sorted.bam index done "
48 |  
49 | # 去除PCR重复
50 | time $gatk MarkDuplicates \
51 |            -I "$outdir/bwa/${sample}.sorted.bam" \
52 |            -M "$outdir/bwa/${sample}.markup_metrics.txt" \
53 |            -O "$outdir/bwa/${sample}.sorted.markup.bam" \
54 |            1> "$outdir/bwa/${sample}.log.mark" 2>&1 \
55 |            && echo "** ${sample}.sorted.bam MarkDuplicates done **"
56 |  
57 | # samtools index 去除PCR标记的bam 文件
58 | time $samtools index "$outdir/bwa/${sample}.sorted.markup.bam" \
59 |               && echo " ** ${sample}.sorted.markup.bam index done **"
60 |  
61 | #gatk开始：必选 -I -O -R，代表输入、输出、参考
62 | #接下来可以按照字母顺序依次写出来，这样比较清晰
63 | #-bamout：将一整套经过gatk程序重新组装的单倍体基因型（haplotypes）输出到文件
64 | #-stand-call-conf :低于这个数字的变异位点被忽略，可以设成标准30（默认是10）
65 | time $gatk --java-options "-Xmx100G -Djava.io.tmpdir=./tmp" HaplotypeCaller \
66 |     -R $reference \
67 |     -I "$outdir/bwa/${sample}.sorted.markup.bam" \
68 |     -O "$outdir/gatk/${sample}.HC.gvcf.gz" \
69 |     --tmp-dir "$outdir/gatk"\
70 |     --emit-ref-confidence GVCF \
71 |     -stand-call-conf 10 \
72 |     && echo "** GVCF ${sample}.HC.gvcf.gz done ***"


--------------------------------------------------------------------------------
/BSAseq/gatk_step3_gvcfmerg.sh:
--------------------------------------------------------------------------------
 1 | # move the gvcf and tbi file into the directory 
 2 | gatk=/mnt/zhou/hangyuan/miniconda3/bin/gatk
 3 | reference=/mnt/zhou/hangyuan/BSA-seq/P20210613_AtBSA-raw.bam.results_20210621/01.rawData/TAIR10.fa
 4 | time $gatk CombineGVCFs \
 5 |       -R $reference \
 6 |       -V HR.HC.gvcf.gz \
 7 |       -V NHR.HC.gvcf.gz \
 8 |       -O BSA_with2pools.gvcf.gz
 9 | time $gatk GenotypeGVCFs \
10 |      -R  $reference \
11 |      -V "BSA_with2pools.gvcf.gz" \
12 |      -O "BSA_no_filter.HC.vcf" 


--------------------------------------------------------------------------------
/BSAseq/gatk_step4_filter_SNP.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | gatk=/mnt/zhou/hangyuan/miniconda3/bin/gatk
 3 | reference=/mnt/zhou/hangyuan/BSA-seq/P20210613_AtBSA-raw.bam.results_20210621/01.rawData/TAIR10.fa
 4 | 
 5 | #select SNP：提取出SNP
 6 | time $gatk SelectVariants -R $reference \
 7 |     -V  BSA_no_filter.HC.vcf \
 8 |     -select-type SNP \
 9 |     -O BSA_combine_SNPs.vcf
10 | #select INDELS：提取出INDELS
11 | time $gatk SelectVariants -R $reference \
12 |     -V  BSA_no_filter.HC.vcf \
13 |     -select-type INDEL  \
14 |     -O BSA_combine_INDELs.vcf
15 | 
16 | #To filter SNPs：过滤出高质SNPs
17 | time $gatk VariantFiltration  \
18 |     -V BSA_combine_SNPs.vcf \
19 |     -filter "MQ < 40.0" \
20 |     --filter-name "MQ_filter_SNP" \
21 |     -filter "FS > 60.0" \
22 |     --filter-name "FS_filter_SNP" \
23 |     -filter  "QD < 4.0" \
24 |     --filter-name "QD_filter_SNP" \
25 |     -O BSA_SNPs_filter.vcf
26 |  
27 | grep -E '^#|PASS' BSA_SNPs_filter.vcf > BSA_SNPs_filterPASSED.vcf
28 |  
29 | #To filter INDELS：过滤出高质量INDELS
30 | time $gatk VariantFiltration \
31 |     -V BSA_combine_INDELs.vcf \
32 |     -filter "QD < 4.0" \
33 |     --filter-name "QD_filter_INDEL" \
34 |     -filter "FS > 200.0" \
35 |     --filter-name "FS_filter_INDEL" \
36 |     -O BSA_INDELs_filter.vcf
37 | 
38 | grep -E '^#|PASS' BSA_INDELs_filter.vcf > BSA_INDELs_filterPASSED.vcf
39 | 
40 | #合并snp过滤结果和indel过滤结果
41 | time $gatk MergeVcfs \
42 |     -I BSA_INDELs_filterPASSED.vcf \
43 |     -I BSA_SNPs_filterPASSED.vcf \
44 |     -O BSA_filter_merged_SNP_INDEL.vcf
45 | 
46 | #This is the number of sites before filtering:
47 | grep -vc "^#" BSA_combine_SNPs.vcf
48 | grep -vc "^#" BSA_combine_INDELs.vcf
49 | #This is the number of sites retained after filtering:
50 | grep -vc "^#" BSA_SNPs_filter.vcf
51 | grep -vc "^#" BSA_INDELs_filter.vcf
52 |  
53 | time $gatk VariantsToTable \
54 | 	 -R $reference \
55 | 	 -V BSA_filter_merged_SNP_INDEL.vcf \
56 | 	 -F CHROM -F POS -F REF -F ALT -GF AD -GF DP -GF GQ -GF PL \
57 | 	 -O BSA.filter.table


--------------------------------------------------------------------------------
/Basic_Path.sh:
--------------------------------------------------------------------------------
 1 | #jar
 2 | 204:/data1/home/yafei/005_Script/Jar/
 3 | #E6 VCF
 4 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/VCF
 5 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225
 6 | 204:xuebo:/data2/xuebo/Projects/Speciation/E6/Landrace_locate_225
 7 | #RDA
 8 | 
 9 | #做迁徙和环境适应性路径
10 | #RDA
11 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/Lineages
12 | #42chr
13 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6
14 | 203:yafei:/data1/home/yafei/008_Software/snpEff/data2
15 | #lineages
16 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/Out_E6/VCF/VmapE6
17 | #分群
18 | 204:yafei:/data1/home/yafei/003_Project3/Structure/group
19 | #EU_group.txt      North2_30_group.txt  South_group.txt          Tibet.txt
20 | #North1_group.txt  North2_group.txt     South_group_noTibet.txt  WA_group.txt
21 | #LFMM
22 | 204@yafei /data1/home/yafei/003_Project3/Structure/LFMM/V2
23 | #XP-clr + smooth
24 | 204:xuebo:/data2/xuebo/Projects/Speciation/xpclr/Selection_V2/smooth/smooth_result/Top5%
25 | 203:xuebo:/data1/home/xuebo/Projects/Speciation/xpclr/Selection_V2
26 | 204:/data2/xuebo/Projects/Speciation/xpclr/Selection_V3
27 | #VIP gene分析
28 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2/VIP_genes
29 | 204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2
30 | #VCF 变异注释分析
31 | 203:yafei:/data1/home/yafei/008_Software/snpEff/
32 | SNPeff----52个GA通路相关基因的变异分析
33 | 参考网站：https://www.jianshu.com/p/a6e46d0c07ee(SnpEff使用方法)
34 | https://www.jianshu.com/p/f898ffc4ef48(SnpEff结果解读)
35 | fasta文件: 203: /data1/home/yafei/Project3/HaploType/Fulab/Fa/
36 | vcf文件: 203: /data1/home/yafei/Project3/HaploType/Fulab/Vcf/
37 | 软件位置: 203: /data1/home/yafei/Software/snpEff/
38 | 结果文件: 203: /data1/home/yafei/Software/snpEff/data/
39 | #XP-CLR negative contral
40 | /Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/NegativeContral
41 | #IBS with CS
42 | 203:/data2/yafei/003_project3/Project3/CS_Vmap
43 | 计算FST
44 | 203:/data2/yafei/003_project3/Project3/FST_group
45 | Fst_population
46 | 203:/data2/yafei/003_project3/Project3/FST_group/VmapData/FST_selection/Fst_pop
47 | #VMap3数据集
48 | filter1: reliable library
49 | filter2: hetThresh = 0.05; nonmissingThresh = 0.8; macThresh = 2; biallele
50 | java -Xmx300g -jar /data2/yafei/004_Vmap3/Fastcall2/03_Jar/PrivatePlantGenetics.jar
51 | 204:/data4/home/yafei/vcf_AB1/VCF/chr001_VMap3.vcf.gz
52 | 204:/data4/home/yafei/vcf_AB1/Filter2/chr001_VMap3.vcf.gz
53 | #53先导file
54 | 204:/data4/home/yafei/vcf_AB1/vcf/VCF/chr001_VMap3.vcf.gz
55 | #gff文件
56 | 204:/data2/yafei/GeneLulab1_1_LC.gff3
57 | 204:/data2/yafei/gene_v1.1_Lulab.gff3
58 | #VMap3基本统计
59 | 204:/data2/yafei/004_Vmap3/Fastcall2/02_Output/vcf_AB1/Filter2/5k.tree/structure
60 | #DEF-est
61 | 204:/data1/home/yafei/008_Software/dfe-alpha-release-2.16/Test/input/A.1fold_scaled.txt
62 | #bayenv
63 | 204:yafei:/data1/home/yafei/003_Project3/Structure/E6_Landrace_locate_225/bayenv
64 | #BAYENV
65 | 204@yafei:/data1/home/yafei/003_Project3/Structure/bayenv/ENVBAY
66 | #VMap3的group分类
67 | 204@yafei:/data4/home/yafei/vcf_AB1/Merge/Group
68 | 


--------------------------------------------------------------------------------
/Code/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yafei-Guo-coder/DumpNice/183d85aba75932aeacf7484386eb0cae484c7d9b/Code/.DS_Store


--------------------------------------------------------------------------------
/Code/01_Density.r:
--------------------------------------------------------------------------------
 1 | #install.packages('RIdeogram')
 2 | require(RIdeogram)
 3 | 
 4 | setwd("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/13_Plots/01_Density")
 5 | #gene_density <- read.table("ArinaLrFor_LTR_1.txt", header=T,stringsAsFactors = F)
 6 | gene_density2 <- read.table("gene_density.txt", header=T, stringsAsFactors = F)
 7 | gene_density <- read.table("21-1M_VMap3_SnpDensity.txt", header=T, stringsAsFactors = F)
 8 | wheat_karyotype <- read.table("wheat_karyotype.txt", header=T, stringsAsFactors = F)
 9 | ideogram(karyotype = wheat_karyotype, overlaid = gene_density)
10 | convertSVG("chromosome.svg", device = "pdf")
11 | 
12 | library(CMplot)
13 | setwd("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据")
14 | mydata<-read.table("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据/fastcall2_001_pos.txt",header=TRUE,sep="\t")
15 | head(mydata)
16 | # snp chr pos
17 | # snp1_1  1 2041
18 | # snp1_2  1 2062
19 | # snp1_3  1 2190
20 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 
21 | mydata<-read.table("/Users/guoyafei/Documents/01_个人项目/02_VmapIII/03_Fastcall2/测试数据/fastcall_001_pos.txt",header=TRUE,sep="\t")
22 | head(mydata)
23 | # snp         chr       pos
24 | # snp1_1    1        2041
25 | # snp1_2    1        2062
26 | # snp1_3    1        2190
27 | CMplot(mydata,plot.type="d",bin.size=1e4,col=c("darkgreen","yellow", "red"),file="jpg",memo="snp_density",dpi=300) 


--------------------------------------------------------------------------------
/Code/04_FST.r:
--------------------------------------------------------------------------------
 1 | #画热图 
 2 | #文件目录：yafei@203:/data2/yafei/Project3/FST_group/VmapData/heatmap
 3 | #A_mean.fst	B_mean.fst	D_mean.fst	AB_mean.fst
 4 | #A_weight.fst	B_weight.fst	D_weight.fst	AB_weight.fst
 5 | 
 6 | library("corrplot")
 7 | 
 8 | #weighted FST
 9 | 
10 | #A lineage
11 | data <- read.table("A_weight.fst",header=T,stringsAsFactors=F,sep="\t")
12 | rownames(data) <- data$Alineage
13 | data<- data[,-1]
14 | data<- as.matrix(data)
15 | pdf("weight_A_fst.pdf",width=30,height=30)
16 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1))
17 | dev.off()
18 | 
19 | #B lineage
20 | data <- read.table("B_weight.fst",header=T,stringsAsFactors=F,sep="\t")
21 | rownames(data) <- data$Blineage
22 | data<- data[,-1]
23 | data<- as.matrix(data)
24 | pdf("weight_B_fst.pdf",width=30,height=30)
25 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1))
26 | dev.off()
27 | 
28 | #D lineage
29 | data <- read.table("D_weight.fst",header=T,stringsAsFactors=F,sep="\t")
30 | rownames(data) <- data$Dlineage
31 | data<- data[,-1]
32 | data<- as.matrix(data)
33 | pdf("weight_D_fst.pdf",width=30,height=30)
34 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1))
35 | dev.off()
36 | 
37 | #AB lineage
38 | data <- read.table("AB_weight.fst",header=T,stringsAsFactors=F,sep="\t")
39 | rownames(data) <- data$ABlineage
40 | data<- data[,-1]
41 | data<- as.matrix(data)
42 | pdf("weight_AB_fst.pdf",width=30,height=30)
43 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1))
44 | dev.off()
45 | 
46 | #mean FST
47 | 
48 | #A lineage
49 | data <- read.table("A_mean.fst",header=T,stringsAsFactors=F,sep="\t")
50 | rownames(data) <- data$Alineage
51 | data<- data[,-1]
52 | data<- as.matrix(data)
53 | pdf("mean_A_fst.pdf",width=30,height=30)
54 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1))
55 | dev.off()
56 | 
57 | #B lineage
58 | data <- read.table("B_mean.fst",header=T,stringsAsFactors=F,sep="\t")
59 | rownames(data) <- data$Blineage
60 | data<- data[,-1]
61 | data<- as.matrix(data)
62 | pdf("mean_B_fst.pdf",width=30,height=30)
63 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1))
64 | dev.off()
65 | 
66 | #D lineage
67 | data <- read.table("D_mean.fst",header=T,stringsAsFactors=F,sep="\t")
68 | rownames(data) <- data$Dlineage
69 | data<- data[,-1]
70 | data<- as.matrix(data)
71 | pdf("mean_D_fst.pdf",width=30,height=30)
72 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1))
73 | dev.off()
74 | 
75 | #AB lineage
76 | data <- read.table("AB_mean.fst",header=T,stringsAsFactors=F,sep="\t")
77 | rownames(data) <- data$ABlineage
78 | data<- data[,-1]
79 | data<- as.matrix(data)
80 | pdf("mean_AB_fst.pdf",width=30,height=30)
81 | corrplot(data,method = "color",tl.col="black",tl.srt = 45,addrect=4,addCoef.col = "grey",number.cex=4,number.digits=3,tl.cex=2.5,cl.cex=3,cl.lim = c(0, 1))
82 | dev.off()
83 | 


--------------------------------------------------------------------------------
/Code/07_PCA.r:
--------------------------------------------------------------------------------
 1 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral")
 2 | library("FactoMineR")
 3 | library("ggplot2")
 4 | library("factoextra")
 5 | Elevation <- read.csv("Elevation.txt",fill=TRUE, row.names = 1,na.strings = "",header = T, sep="\t", stringsAsFactors = F)
 6 | Temp <- read.table("Temp.txt", header=T,row.names = 1,stringsAsFactors = F)
 7 | Prec <- read.table("Prec.txt",header=T,row.names = 1,stringsAsFactors = F)
 8 | bio <- read.table("20bio.txt",header=T,stringsAsFactors = F)
 9 | dt <- as.matrix(scale(bio[!duplicated(bio, fromLast=TRUE),]))
10 | rm1<-cor(dt)
11 | rm1
12 | rs1<- eigen(rm1)
13 | #提取结果中的特征值，即各主成分的方差；
14 | val <- rs1$values
15 | 
16 | #换算成标准差(Standard deviation);
17 | (Standard_deviation <- sqrt(val))
18 | #计算方差贡献率和累积贡献率；
19 | (Proportion_of_Variance <- val/sum(val))
20 | (Cumulative_Proportion <- cumsum(Proportion_of_Variance))
21 | #碎石图绘制;
22 | par(mar=c(6,6,2,2))
23 | plot(rs1$values,type="b",
24 |      cex=2,
25 |      cex.lab=2,
26 |      cex.axis=2,
27 |      lty=2,
28 |      lwd=2,
29 |      xlab = "PC",
30 |      ylab="Eigenvalue (Principal Component Variance)")
31 | #提取结果中的特征向量(也称为Loadings,载荷矩阵)；
32 | (U<-as.matrix(rs1$vectors))
33 | #进行矩阵乘法，获得PC score；
34 | PC <- dt %*% U
35 | colnames(PC) <- paste("PC",1:20,sep="")
36 | head(PC)
37 | 
38 | #将iris数据集的第5列数据合并进来；
39 | #df<-data.frame(PC, dt_all$ploidy, dt_all$Region)
40 | #head(df)
41 | 
42 | library(ggplot2)
43 | #提取主成分的方差贡献率，生成坐标轴标题；
44 | xlab<-paste0("PC1(",round(Proportion_of_Variance[1]*100,2),"%)")
45 | ylab<-paste0("PC2(",round(Proportion_of_Variance[2]*100,2),"%)")
46 | #绘制散点图并添加置信椭圆；
47 | p1<-ggplot(data = df,aes(x=PC1,y=PC2,color=df$dt_all.ploidy))+
48 |   stat_ellipse(aes(fill=df$dt_all.ploidy),
49 |                type ="norm", geom ="polygon",alpha=0.2,color=NA)+
50 |   geom_point()+labs(x=xlab,y=ylab,color="")+
51 |   guides(fill=F)+
52 |   theme_classic()
53 | p2<-ggplot(data = PC,aes(x=PC1,y=PC2))+
54 |     geom_point()+labs(x=xlab,y=ylab,color="")+
55 |   guides(fill=F)+
56 |   theme_classic()
57 | p1
58 | p2
59 | 
60 | #------
61 | dt_all$lat_bin <- NA
62 | dt_all[which(dt_all$lat >= -45 & dt_all$lat < -35),24] <- "[-45,-35)"
63 | dt_all[which(dt_all$lat >= -35 & dt_all$lat < -25),24] <- "[-35,-25)"
64 | dt_all[which(dt_all$lat >= -25 & dt_all$lat < -15),24] <- "[-25,-15)"
65 | dt_all[which(dt_all$lat >= -15 & dt_all$lat < -5),24] <- "[-15,-5)"
66 | dt_all[which(dt_all$lat >= -5 & dt_all$lat < 5),24] <- "[-5,5)"
67 | dt_all[which(dt_all$lat >= 5 & dt_all$lat < 15),24] <- "[5,15)"
68 | dt_all[which(dt_all$lat >= 15 & dt_all$lat < 25),24] <- "[15,25)"
69 | dt_all[which(dt_all$lat >= 25 & dt_all$lat < 35),24] <- "[25,35)"
70 | dt_all[which(dt_all$lat >= 35 & dt_all$lat < 45),24] <- "[35,45)"
71 | dt_all[which(dt_all$lat >= 45 & dt_all$lat < 55),24] <- "[45,55)"
72 | dt_all[which(dt_all$lat >= 55 & dt_all$lat < 65),24] <- "[55,65)"
73 | dt_all[which(dt_all$lat >= 65 & dt_all$lat < 75),24] <- "[65,75)"
74 | df$lat_bin <- dt_all$lat_bin
75 | p2<-ggplot(data = df,aes(x=PC1,y=PC2,color=df$lat_bin))+
76 |   stat_ellipse(aes(fill=df$dt_all.Region),
77 |                type ="norm", geom ="polygon",alpha=0.2,color=NA)+
78 |   geom_point()+labs(x=xlab,y=ylab,color="")+
79 |   guides(fill=F)+
80 |   theme_classic()
81 | p1
82 | 


--------------------------------------------------------------------------------
/Code/09_Admixture.sh:
--------------------------------------------------------------------------------
 1 | #按MAF>0.05和缺失率<0.1过滤
 2 | plink --vcf test.imputed.vcf --maf 0.05 --geno 0.1 --recode vcf-iid --out test.filter --allow-extra-chr --double-id --autosome-num 42
 3 | --keep-only-indels
 4 | #对VCF进行LD筛选
 5 | plink --vcf test.filter.vcf --indep-pairwise 50 10 0.2 --out test.filterLD --allow-extra-chr --double-id --autosome-num 42
 6 | 
 7 | #提取筛选结果
 8 | #plink --vcf test.filter.vcf --make-bed --extract test.filterLD.prune.in --out  test.filter.prune.in --double-id --autosome-num 42
 9 | plink --vcf test.filter.vcf --extract test.filterLD.prune.in --recode vcf-iid --out  test.filter.prune.in --double-id --autosome-num 42
10 | 
11 | #转换成structure/admixture格式
12 | plink --bfile test.filter.prune.in --recode structure --out test.filter.prune.in  #生成. recode.strct_in为structure输入格式
13 | plink --bfile test.filter.prune.in --recode 12 --out test.filter.prune.in --autosome-num 42 #生成.ped为admixture输入格式
14 | plink --bfile test.filter.prune.in --recode vcf --out test.filter.prune.in --autosome-num 42
15 | 
16 | #admixture
17 | admixture --cv test.filter.ped 1 >>log.txt
18 | admixture --cv test.filter.ped 2 >>log.txt
19 | .......
20 | admixture --cv test.filter.ped 13 >>log.txt
21 | wait
22 | grep "CV error" log.txt > k_1to13
23 | 


--------------------------------------------------------------------------------
/Code/10_Xp-clr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | sleep 4800s
 3 | for i in {1..42}
 4 | do
 5 |   WGS --model vcf --type toXPCLR --group1 ../groupTaxa/EA.txt --rec ../../slidewindow_recomrate_updown_20M.txt --chr $i --file /data2/xuebo/Projects/Speciation/E6/chr${i}.Land.vcf.gz --out groupEAChr${i} &
 6 | done
 7 | 
 8 | #!/bin/bash
 9 | for i in {1..42}
10 | do
11 | 	XPCLR -xpclr ../groupEA/groupEAChr${i}.geno ../groupWA/groupWAChr${i}.geno ../groupWA/groupWAChr${i}.snp EA_WA_10kchr${i} -w1 0.005 500 10000 $i -p1 0.95 &
12 | done
13 | 


--------------------------------------------------------------------------------
/Code/14_Xp-clr_Haplotype.sh:
--------------------------------------------------------------------------------
 1 | #working directory
 2 | #204:yafei:/data2/yafei/003_Project3/Vmap1.1/E6/Xp-CLR_V2
 3 | 
 4 | #extract gene_region VCF from 225 all vcf files
 5 | grep -w -f 87Gene_id.txt gene_v1.1_Lulab.gff3 | awk -F";" '{print $1}' |awk '{print $1"\t"$4-5000"\t"$5+5000"\t"$9}' | awk -F"\tID=" '{print $2"\t"$1}' > 87gene_5k.txt
 6 | #bash getVcf.sh 87gene_5k.txt
 7 | cat $1 |while read gene chr from to
 8 | do
 9 |     #echo $chr $from $to
10 |     #if echo $2 |grep -q '.*.vcf.gz$';then
11 |     #    vcftools --gzvcf $2 --chr $chr --from-bp $from --to-bp $to  --recode --recode-INFO-all --out $gene.$chr.$from-$to 
12 |     #elif echo $2 |grep -q '.*.vcf$';then
13 |         vcftools --gzvcf /data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/chr$chr.E6_Landrace_locate.vcf.gz --chr $chr --from-bp $from --to-bp $to  --recode --recode-INFO-all --out 5k.$gene.$chr.$from-$to
14 |     #fi
15 | done
16 | 
17 | #传到本地用Java Migration/Haplotype将vcf转换成单倍型txt格式，使用07_VCF_Haplotype_Visual.r进行可视化。
18 | #本地路径：/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/Gene/V2/TXT


--------------------------------------------------------------------------------
/Code/15_Xp-clr_NegativeContral.r:
--------------------------------------------------------------------------------
 1 | #XP-CLR negative contral
 2 | library(RColorBrewer)
 3 | library(ggplot2)
 4 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral")
 5 | dist <- read.table("dis_matrix.txt",header=T,stringsAsFactors = F)
 6 | out <- strsplit(dist[,1], ":")
 7 | dist$ID1 <- NA
 8 | dist$ID2 <- NA
 9 | a<-1
10 | for (i in c(1:length(out))){
11 |   dist[a,6] <- out[[i]][1]
12 |   dist[a,7] <- out[[i]][2]
13 |   a <- a+1
14 | }
15 | #dim(dist[which(dist$geopolitical_dis>100 & dist$TEMP_dis<10),])
16 | WA <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/WA.txt",header =F,stringsAsFactors = F)
17 | North1 <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/North1.txt",header=F,stringsAsFactors = F)
18 | North2 <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral/group_V2/North2.txt",header=F,stringsAsFactors = F)
19 | #Temperature
20 | pdf("Temperature_dist.pdf",height = 10,width = 10)
21 | #WA VS North1
22 | dist$Type1 <- NA
23 | dist[which((dist$ID1 %in% WA[,1] & dist$ID2 %in% North1[,1])) ,8] <- "Yes"
24 | dist[which((dist$ID1 %in% North1[,1] & dist$ID2 %in% WA[,1])) ,8] <- "Yes"
25 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) +
26 |   geom_point( size=3,alpha=0.2)+
27 |   theme_classic()+
28 |   ggtitle("WA VS North1")+
29 |   theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
30 | print(p)
31 | #WA VS North2
32 | dist$Type1 <- NA
33 | dist[which((dist$ID1 %in% WA[,1] & dist$ID2 %in% North2[,1])) ,8] <- "Yes"
34 | dist[which((dist$ID1 %in% North2[,1] & dist$ID2 %in% WA[,1])) ,8] <- "Yes"
35 | 
36 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) +
37 |   geom_point( size=3,alpha=0.2)+
38 |   theme_classic()+
39 |   ggtitle("WA VS North2")+
40 |   theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
41 | 
42 | print(p)
43 | 
44 | #North1 VS North2
45 | dist$Type1 <- NA
46 | dist[which((dist$ID1 %in% North2[,1] & dist$ID2 %in% North1[,1])),8] <- "Yes"
47 | dist[which((dist$ID1 %in% North1[,1] & dist$ID2 %in% North2[,1])),8] <- "Yes"
48 | 
49 | p <- ggplot(dist, aes(geopolitical_dis,TEMP_dis, color=Type1)) +
50 |   geom_point( size=3,alpha=0.2)+
51 |   theme_classic()+
52 |   ggtitle("North1 VS North2")+
53 |   theme(plot.title = element_text(color="red", size=20, face="bold.italic"),legend.text = element_text(size=20),legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))
54 | print(p)
55 | dev.off()
56 | 


--------------------------------------------------------------------------------
/Code/17_Sample_Cluster.r:
--------------------------------------------------------------------------------
 1 | library(cluster)
 2 | library(factoextra)
 3 | #聚类
 4 | #根据经纬度给样本聚类
 5 | #data <- read.table("land.txt", header=T, sep="\t", stringsAsFactors = F)
 6 | #dataA <- data[,c(3,4)]
 7 | #data2 <- dataA[!is.na(dataA$Latitude),]
 8 | 
 9 | data <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/04_bayenv/225env.txt", header=F,stringsAsFactors = F)
10 | colname <- c("elevation","temp1","temp2","temp3","temp4","temp5","temp6","temp7","temp8","temp9","temp10","temp11","prec1","prec2","prec3","prec4","prec5","prec6","prec7","prec8","Latitude","Logititude")
11 | rownames(data) <- data[,1]
12 | data2 <- data[!is.na(data$V6),-1]
13 | colnames(data2) <- colname
14 | df = scale(data2,center = T,scale = T)
15 | colnames(df) <- colname
16 | #按列进行标准化
17 | #先求样本之间两两相似性
18 | result <- dist(df, method = "euclidean")
19 | #使用指定距离来计算数据矩阵行之间的距离
20 | #euclidean：欧几里得距离
21 | #maximum：最大距离
22 | #manhattan：绝对距离
23 | #canberra：堪培拉距离
24 | #minkowski：闵可夫斯基距离
25 | #产生层次结构
26 | result_hc <- hclust(d = result, method = "ward.D2")
27 | #Ward: 最小方差方法旨在寻找紧凑的球形簇的完整的联动方法找到相似集群。
28 | #有两种不同的算法，"ward.D"（相当于只沃德选择"ward"不执行沃德（1963）聚类准则）& "ward.D2"实现了标准（Murtagh的及Legendre 2014）在集群更新之前对差异进行平方。注意agnes(*, method="ward")对应于hclust(*, "ward.D2").
29 | #median和centroid在不导致单调的距离测量，或者等效产生的树状图可以具有所谓的倒置或颠倒。
30 | 
31 | data2$type <- cutree(result_hc, k=30)
32 | lat_mean <- tapply(data2[,21],data2$type,mean,na.rm = TRUE)
33 | lon_mean <- tapply(data2[,22],data2$type,mean,na.rm = TRUE)
34 | data2$cluster1 <- NA
35 | data2$cluster2 <- NA
36 | for(i in 1:224) {
37 |   for(j in 1:30){
38 |     if(data2[i,23] == j ){
39 |       data2[i,24] <- as.numeric(lat_mean[j])
40 |       data2[i,25] <- as.numeric(lon_mean[j])
41 |     } 
42 |   }
43 | }
44 | write.table(data2,"30_cluster.txt",sep="\t",row.names = F,quote=F)
45 | 
46 | library(maps)
47 | mp<-NULL
48 | mapworld<-borders("world",colour = "gray70",fill="gray70") 
49 | mp<-ggplot()+mapworld+ylim(-90,90)
50 | mp_40<-mp+geom_point(aes(x=re$Longitude_40, y=re$Latitude_40))+
51 |   scale_size(range=c(1,1))+ 
52 |   theme_classic()


--------------------------------------------------------------------------------
/Code/19_Qmatrix_PieMap.r:
--------------------------------------------------------------------------------
 1 | #统一设置：注释内容及颜色
 2 | library(pheatmap)
 3 | require(reshape)
 4 | require (rworldmap)
 5 | require(rworldxtra)
 6 | library(RColorBrewer)
 7 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5")
 8 | annotation_col <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Taxa_Region_225.txt",header=T,stringsAsFactors = F)
 9 | rownames(annotation_col) = c(1:325)
10 | seq <- annotation_col[,1]
11 | #cluster samples: 按照算Xp-clr的区域划分
12 | out <- annotation_col[,c(4,5,9)]
13 | lat_mean <- tapply(out[,1],out$Region_sub2,mean,na.rm = TRUE)
14 | lon_mean <- tapply(out[,2],out$Region_sub2,mean,na.rm = TRUE)
15 | out$cluster1 <- NA
16 | out$cluster2 <- NA
17 | for(i in 1:225) {
18 |         for(j in names(lat_mean)){
19 |                 if(out[i,3] == j ){
20 |                         out[i,4] <- as.numeric(lat_mean[j])
21 |                         out[i,5] <- as.numeric(lon_mean[j])
22 |                 } 
23 |         }
24 | }
25 | mode <- as.data.frame(cbind(annotation_col[,2],as.numeric(out[,4]),as.numeric(out[,5])),stringsAsFactors = F)
26 | mode$V2 <- as.numeric(mode$V2)
27 | mode$V3 <- as.numeric(mode$V3)
28 | #map
29 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Gene/V5/TXT" ##文件目录
30 | fileNames <- dir(path)  ##获取该路径下的文件名
31 | filePath <- sapply(fileNames, function(x){ 
32 |         paste(path,x,sep='/')})   ##生成读取文件路径
33 | data <- lapply(filePath, function(x){
34 |         read.table(x, header=F,stringsAsFactors = F)})
35 | pdf("cut_8_piemap.pdf")
36 | for (i in c(1:82)){
37 |         all <- as.matrix(data[[i]])
38 |         colnames(all) <- c(1:225)
39 |         all <- all[,seq]
40 |         data.e <- dist(t(all))
41 |         model <- hclust(data.e,method = "complete")
42 |         #plot(model)
43 |         result <- as.numeric(cutree(model, k=8))
44 |         out2 <- as.data.frame(cbind(mode,result))
45 |         out2$value <- 1
46 |         colnames(out2)[1:5] <- c("type","Latitude", "Logititude", "Sub.population.2", "value")
47 |         wheat2 = out2[!is.na(out2$Latitude),]
48 |         wheat = wheat2[!is.na(wheat2$Sub.population.2),]
49 |         wheat_reshape <- cast(wheat,Latitude+Logititude~Sub.population.2) 
50 |         wheat_reshape2 <- as.data.frame(wheat_reshape)
51 |         mapPies(wheat_reshape2,xlim=c(-120,140),ylim=c(0,40),nameX="Logititude",nameY="Latitude",nameZs=c("1","2","3","4","5","6","7","8"),symbolSize=1,
52 |                 zColours=brewer.pal(8, "Set2"),barOrient='vert',oceanCol="#D1EEEE",landCol="#FFDAB9",main=i)
53 | }
54 | dev.off()


--------------------------------------------------------------------------------
/Code/20_Qmatrix_Visual.r:
--------------------------------------------------------------------------------
 1 | #统一设置：注释内容及颜色
 2 | library(pheatmap)
 3 | require(reshape)
 4 | require (rworldmap)
 5 | require(rworldxtra)
 6 | library(pophelper)
 7 | library(ggplot2)
 8 | require(gridExtra)
 9 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/06_Structure/V1/Select")
10 | #load Qmatrix files
11 | sfiles <- list.files(path="/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/06_Structure/V1/Select/QMatrix", full.names=T)
12 | slist <- readQ(files=sfiles)
13 | tabulateQ(qlist=readQ(sfiles))
14 | summariseQ(tabulateQ(qlist=readQ(sfiles)))
15 | 
16 | pdf("Q.pdf",width = 24,height = 8)
17 | p1 <- plotQ(slist[1:7],returnplot=T,exportplot=F,quiet=T,basesize=11,
18 |             sortind="all",showindlab=F,showyaxis=T,showticks=T,sharedindlab=T,clustercol=brewer.pal(7, "Set2"))
19 | grid.arrange(p1$plot[[1]],p1$plot[[2]],p1$plot[[3]],p1$plot[[4]],p1$plot[[5]],nrow=5)
20 | dev.off()
21 | 


--------------------------------------------------------------------------------
/Code/21_PCA_climate.r:
--------------------------------------------------------------------------------
 1 | #working directory
 2 | #/Users/guoyafei/Documents/01_个人项目/02_Migration/02_数据表格/01_Vmap1-1/01_Add_ZNdata/05_Environment/XP-CLR/NegativeContral
 3 | library(RColorBrewer)
 4 | library(ggmap)
 5 | setwd("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/02_Environment/02_XP-CLR/NegativeContral")
 6 | location <- read.table("/Users/guoyafei/Documents/01_个人项目/01_Migration/02_Add_ZNdata/01_BasicStatistic/01_IBS/04_795taxaIBS/795_Location.txt",header=T,stringsAsFactors = F)
 7 | 
 8 | Elevation <- read.table("Elevation.txt",header=T,stringsAsFactors = F)
 9 | ele <- merge(Elevation,location,by="ID")
10 | colnames(ele)[2] <-"PC1"
11 | 
12 | Temp <- read.table("temp_PC.txt",header=T,stringsAsFactors = F)
13 | Temp$ID <- rownames(Temp)
14 | tem <- merge(Temp,location,by="ID")
15 | 
16 | Prec <- read.table("Prec_PC.txt",header=T,stringsAsFactors = F)
17 | Prec$ID <- rownames(Prec)
18 | pre <- merge(Prec,location,by="ID")
19 | 
20 | bio <- read.table("20bio_PC.txt",header=T,row.names = 1)
21 | #bio$ID <- rownames(bio)
22 | #bio <- merge(bio,location,by="ID")
23 | 
24 | data <- list(ele,tem,pre)
25 | pdf("20bio_PC1.pdf",width = 12,height = 7.5)
26 | for(i in c(1:2)){
27 | mp <- NULL
28 | mapworld <- borders("world",colour = "grey90",fill="white") 
29 | mp <- ggplot()+mapworld+ylim(-60,90)+xlim(-25,200)
30 | mp2 <- mp+geom_point(aes(x=bio$Logititude,y=bio$Latitude,color=bio$PC1),size=2.6)+scale_size(range=c(1,1))+ 
31 |   scale_colour_gradientn(colours = brewer.pal(11, "RdYlBu"))+
32 |   theme(legend.title=element_blank(),axis.text.x = element_text(size = 25), axis.title.x = element_text(size = 25),axis.text.y = element_text(size = 25),axis.title.y = element_text(size = 25))+
33 |   theme(panel.grid = element_blank()) + 
34 |   #theme(panel.grid =element_blank()) +   ## 删去网格线
35 |   #theme(axis.text = element_blank()) +   ## 删去刻度标签
36 |   #theme(axis.ticks = element_blank()) +   ## 删去刻度线
37 |   theme(panel.border = element_blank())
38 | print(mp2)
39 | }
40 | dev.off()
41 | 
42 | 
43 | #看筛选样本的分布位点
44 | #taxa <- read.table("group_V2/max_temp.txt",header=F,stringsAsFactors = F)
45 | #colnames(taxa)[1] <-"ID"
46 | #max <- merge(taxa,location,by="ID")
47 | 
48 | 


--------------------------------------------------------------------------------
/Code/22_VIP_Gene_XpCLR_signal.r:
--------------------------------------------------------------------------------
 1 | library(qqman)
 2 | #画信号位点基因的XP-CLR的染色体信号
 3 | #Working directory
 4 | #xuebo@204:/data2/xuebo/Projects/Speciation/xpclr/Selection_V3/smooth/lineage_V2/Top5%/VIP_gene_XpCLR
 5 | #VIP_gene
 6 | #1. TraesCS4D02G364400	Vrn2-2	24	58277633	58279728	chr4D:509282253-509284348(-)	Transcription factor GHD7 [UniProtKB/Swiss-Prot:E5RQA1]	Os10g0560400	NA
 7 | #2. TraesCS2D02G079600	Ppd-1(PRR)	11	33952048	33956269	chr2D:33952048-33956269(-)	Two-component response regulator-like PRR37 [UniProtKB/Swiss-Prot:A2YQ93]	Os07g0695100	NA
 8 | #3. TraesCS2A02G081900	Ppd-A1	7	36933684	36938202	chr2A:36933684-36938202(-)	Two-component response regulator-like PRR37 [UniProtKB/Swiss-Prot:A2YQ93]	Os07g0695100	NA
 9 | #4. TraesCS5A02G473800	Q-5A	26	196896739	196900381	chr5A:650127258-650130900(-)	APETALA2-like protein 2 [UniProtKB/Swiss-Prot:Q84TB5]	Os03g0818800	NA
10 | #5. TraesCS4B02G043100	Rht-B1	21	30861268	30863723	chr4B:30861268-30863723(+)	DELLA protein RHT-1 [UniProtKB/Swiss-Prot:Q9ST59]	Os03g0707600	AT1G66350
11 | #6. TraesCS1D02G029100	Sr33	5	11451423	11459353	chr1D:11451423-11459353(+)	Disease resistance protein RGA5 [UniProtKB/Swiss-Prot:F7J0N2]	NA	AT3G46530
12 | #7. TraesCS1D02G040400	Sr45	5	19341296	19346065	chr1D:19341296-19346065(+)	Putative disease resistance protein RGA4 [UniProtKB/Swiss-Prot:Q7XA39]	Os10g0130800	N
13 | 
14 | #File Gene_Name Chr Start Stop
15 | #North2_South_smooth_A Ppd-A1 7	36933684	36938202
16 | #North2_South_smooth_B Rht-B1 21	30861268	30863723
17 | #North2_South_smooth_D Sr45 5	19341296	19346065
18 | #WA_EU_smooth_A  Q-5A 26	196896739	196900381
19 | #WA_EU_smooth_D  Sr45 5	19341296	19346065
20 | #EU_South_smooth_B Rht-B1 21	30861268	30863723
21 | #EU_South_smooth_D Flowering1 24	58277633	58279728
22 | #EU_South_smooth_D Sr45 5	19341296	19346065
23 | #WA_South_smooth_D Flowering1 24	58277633	58279728
24 | #WA_South_smooth_D Sr33 5	11451423	11459353
25 | #Tibet_South_smooth_D  Ppd-1(PRR) 11	33952048	33956269
26 | 
27 | cat $1 |while read file chr1 chr2
28 | do
29 | awk '{if($1== "'${chr1}'" || $1=="'${chr2}'") {print $0}}' ../../${file}| sort -k1,1n -k2,2g |sed '1i  Chr\tWindowStart\tWindowStop\tSNPcount\tMeanY\tWstat' > ${file::-4}.${chr1}.${chr2}.txt
30 | done
31 | 
32 | #把42条染色体合并成21条
33 | #转移到本地画图
34 | #/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3
35 | setwd("/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3")
36 | #批量读取Xp-clr结果文件
37 | path <- "/Users/guoyafei/Documents/01_Migration/02_Environment/02_XP-CLR/Xpclr/V3/TXT" ##文件目录
38 | fileNames <- dir(path)  ##获取该路径下的文件名
39 | filePath <- sapply(fileNames, function(x){ 
40 |   paste(path,x,sep='/')})   ##生成读取文件路径
41 | data <- lapply(filePath, function(x){
42 |   read.table(x, header=T,stringsAsFactors = F)})
43 | 
44 | #读取阈值文件
45 | thresHold <- read.table("thresHold.txt",header=T,stringsAsFactors = F)
46 | rownames(thresHold) <- thresHold$GEO_region
47 | #读取着丝粒文件
48 | centromer <- read.table("centromerer.txt",header=T,stringsAsFactors = F)
49 | rownames(centromer) <- centromer$chr
50 | 
51 | #读取VIP基因文件
52 | gene <- read.table("VIP_gene.txt",header=T,stringsAsFactors = F)
53 | rownames(gene) <- paste(gene$File,gene$Position,sep=".")
54 | out1 <- strsplit(sub('.chr',':chr', names(data)), ":")
55 | out2 <- strsplit(sub('.txt',':txt', names(data)), ":")
56 | 
57 | pdf("Xp-clr.pdf",width = 10,height = 5)
58 | for(i in c(1:length(data))){
59 |   name <- out1[[i]][1]
60 |   tit <- out2[[i]][1]
61 |   chr <- strsplit(out1[[i]][2], ".txt")[[1]][1]
62 |   title <- paste(gene[tit,1],gene[tit,2],sep=":")
63 |   cen <- centromer[chr,4]
64 |   plot(data[[i]]$WindowStart/1000000, data[[i]]$MeanY, ylim = c(-2,50),axes = T,col = "skyblue",type="l",cex = 2,lwd = 3,ann = F)
65 |   abline(v = gene[tit,7]/1000000, col = "red", cex=2,lwd = 1)
66 |   abline(h=thresHold[name,3], col = "red", lty = 3,cex=2,lwd = 2)
67 |   title(title,xlab= chr, ylab = 'Xp-clr', font.lab = 1, cex.lab = 1.5)
68 |   points(cen/1000000,0, pch=20,cex=2,col="grey")
69 | }
70 | dev.off()
71 | 


--------------------------------------------------------------------------------
/Code/23_Gene_HaploType_Map.r:
--------------------------------------------------------------------------------
 1 | #work directory:
 2 | #/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/sample_group.txt
 3 | #/data1/home/yafei/003_Project3/bayenv/13pop/candidate_gene
 4 | #/data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225
 5 | #Rht-B1:
 6 | #TraesCS4B02G043100
 7 | #chr21 start:30861268 end:30863723
 8 | #ppd-D1
 9 | #TraesCS2D01G079600
10 | #chr11 start:33952048	end:33956269
11 | cat *cloned.gene | sort | uniq -c | awk '{print $2"\t"$3}' > top5.candidate.txt
12 | awk '{print $2}' top5.candidate.txt  > test
13 | grep -f test gene_v1.1_Lulab.gff3 |awk '{print $1"\t"$4"\t"$5"\t"$9}' |awk -F";" '{print $1}' |sed 's/ID=//' > test2
14 | awk 'NR==FNR{a[$2]=$1;b[$2]=$2} NR!=FNR {if($4 in b) {print $0"\t"a[$4]}}' top5.candidate.txt test2 | awk '{print $1"\t"$2-5000"\t"$3+5000"\t"$4"\t"$5}' |sed '1i Chr\tstart-5k\tend+5k\tID\tName' > gene_region.txt
15 | 
16 | cat gene_region.txt |while read chr from to ID Name
17 | do
18 |   if [ $chr != "Chr" ];then
19 |     bcftools filter /data2/yafei/003_Project3/Vmap1.1/E6/Landrace_locate_225/chr${chr}.E6_Landrace_locate.vcf.gz --regions ${chr}:${from}-${to} > ${ID}-${Name}.vcf
20 |   fi
21 | done
22 | for i in `ls *vcf`
23 | do
24 | 
25 | vcftools --vcf $i --012
26 | paste out.012.pos <(cat out.012 | datamash transpose | sed '1d' ) > ${i::-4}.vcf.txt
27 | cat out.012.indv | datamash transpose | awk '{print "File\tChr\tPos\t"$0}' > indi.txt
28 | rm out.012*
29 | done
30 | for i in `ls *vcf.txt`
31 | do
32 | awk '{print FILENAME"\t"$0}' $i  | sed 's/.vcf.txt//g' > ${i::-4}.vcf.txt2
33 | done
34 | cat indi.txt *vcf.txt2 > all.pos.txt
35 | rm TraesCS*
36 | 
37 | library(reshape)
38 | library(ggplot2)
39 | library(RColorBrewer)
40 | require (rworldmap)
41 | setwd("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/")
42 | data <- read.table("/Users/guoyafei/Documents/01_Migration/02_Environment/01_RDA_plot/225.taxa.txt", header=T,row.names = 1, sep="\t", stringsAsFactors = F)
43 | taxa <- read.table("/Users/guoyafei/Documents/01_Migration/01_BasicStatistic/06_Structure/20210829/Cluster_Location/cluster_70.txt",header=T,row.names = 1, stringsAsFactors = F)
44 | for(i in names(table(data$File))){
45 |   sub <- data[which(data$File==i),]
46 |   file <- paste(i,".pdf",sep="")
47 |   pdf(file)
48 |   for( j in c(1:dim(sub)[1])){
49 |     tit <- sub[j,3]
50 |     sample <- sub[j,4:228]
51 |     loc <- taxa[names(sample),4:5]
52 |     loc$type <- as.numeric(sample[1,])
53 |     loc$value <- 1
54 |     wheat_reshape <- cast(loc,Latitude_70+Longitude_70~type) 
55 |     wheat_reshape2 <- as.data.frame(wheat_reshape)
56 |     name <- names(table(loc$type))
57 |     if((j-1)%%4 !=0){
58 |       mapPies(wheat_reshape2,xlim=c(-10,130),ylim=c(10,70),nameX="Longitude_70",nameY="Latitude_70",nameZs=name,symbolSize=1.5,
59 |               zColours=brewer.pal(8, "Set3")[c(2,3,4,5)],barOrient='vert',oceanCol="white",landCol=brewer.pal(9,"Pastel1")[9],main="tit")
60 |       legend(25,95,box.lty=0,bg="transparent","108 landrace GrowHabbit", col="black")
61 |     }else{
62 |       par(mfrow=c(2,2),oma=c(1,1,1,1), mar=c(0,1,0,1), cex=1)
63 |       mapPies(wheat_reshape2,xlim=c(-10,130),ylim=c(10,70),nameX="Longitude_70",nameY="Latitude_70",nameZs=name,symbolSize=1.5,
64 |               zColours=brewer.pal(12, "Set3")[c(2,3,4,5)],barOrient='vert',oceanCol="white",landCol=brewer.pal(9,"Pastel1")[9], main="tit")
65 |       legend(25,95,box.lty=0,bg="transparent",tit, col="black")
66 |     }
67 |   }
68 |   dev.off()
69 | }
70 | 


--------------------------------------------------------------------------------
/Code/readme.txt:
--------------------------------------------------------------------------------
 1 | 01:Plot Chromosome density map.
 2 | 02:Calculate IBS distance
 3 | 03:Calculate population genetic diversity
 4 | 04:Calculate the fixation index (FST)
 5 | 05:Plot the genetic drift and genetic diversity of each population.
 6 | 06:MDS analysis
 7 | 07:Genotype PCA 
 8 | 08:Extract geo-environmental climate variables
 9 | 09:Population structure analysis
10 | 10:Xp-clr analysis
11 | 11:Xp-clr smooth
12 | 12:Xp-clr signal regions
13 | 13:Xp-clr with bayenv
14 | 14:Xp-clr haplotype
15 | 15:Xp-clr negative contral
16 | 16:RDA analysis
17 | 17:Sample location cluster analysis
18 | 18:Haplotype map
19 | 19:Plot the haplotype geographic distribution map.
20 | 20:Population structure visualization
21 | 21:Climate PCA
22 | 22:Candicate gene XP-CLR signal visualization
23 | 23:Candicate gene haplotype map
24 | 24:Candicate gene haplotype visualization
25 | 25:Candicate gene snpEff
26 | 26:Environmental GWAS
27 | 27:PPD gene analysis
28 | 28:Bayenv analysis
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Codes for Vmap1.1 and Vmap3
 2 | ### Statistics and Graphics drawing
 3 |  
 4 | PI.r: Calculate population genetic diversity
 5 | 
 6 | FST.r: Calculate the fixation index (FST)
 7 | 
 8 | IBS.r: Calculate IBS distance
 9 | 
10 | Daf.r: Calculate derived allele frequency
11 | 
12 | Maf.r: Calculate minor allele frequency
13 | 
14 | AdmixtureQ.r: population structure analysis
15 | 
16 | ChrBin.r: Draw a chromosome diagram
17 | 
18 | Cluster.r: Sample location cluster analysis
19 | 
20 | ExtractMap.r: Extract geo-environmental climate variables
21 | 
22 | LDjump.r: Linkage disequilibrium analysis
23 | 
24 | Migration.r: Visualizing spatial population structure from geo-referenced genetic samples
25 | 
26 | Pheatmap.r: Haplotype map
27 | 
28 | Plot_Density.r: Plot Chromosome density map.
29 | 
30 | Plot_Drift_Pi.r: Plot the genetic drift and genetic diversity of each population.
31 | 
32 | Plot_Fst.r: Draw heat maps of the fixation index (FST) between subspecies.
33 | 
34 | Plot_Haplotype.r: Draw haplotype map after population structure analysis.
35 | 
36 | Plot_IBS.r: Draw heat maps of IBS distance of each population.
37 | 
38 | Plot_MapPie.r: Plot the haplotype geographic distribution map.
39 | 
40 | Plot_Pi.r: Plot the genetic diversity of each population.
41 | 
42 | Enrich.r: Gene function enrichment visualization
43 | 
44 | Manhuttan.r: Draw a map of Manhattan
45 | 


--------------------------------------------------------------------------------
/R_Code.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------