├── Images
    ├── 1.admixture_barplot.png
    ├── 2.pie_charts_map.png
    └── 3.Admixture_bar_map.png
├── README.md
├── TJ_genind2structure_function.R
├── coordinates.csv
├── lobster_1278ind_79snps_40pop.RData
└── pie_chart_admixture_map_tutorial.R


/Images/1.admixture_barplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tom-Jenkins/admixture_pie_chart_map_tutorial/b4c44c1bd3dbf546b425b90b603a5d795b3f5a9e/Images/1.admixture_barplot.png


--------------------------------------------------------------------------------
/Images/2.pie_charts_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tom-Jenkins/admixture_pie_chart_map_tutorial/b4c44c1bd3dbf546b425b90b603a5d795b3f5a9e/Images/2.pie_charts_map.png


--------------------------------------------------------------------------------
/Images/3.Admixture_bar_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tom-Jenkins/admixture_pie_chart_map_tutorial/b4c44c1bd3dbf546b425b90b603a5d795b3f5a9e/Images/3.Admixture_bar_map.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Visualising admixture on a map using R
2 | The scripts in this repository cover how to conduct STRUCTURE-like analyses in R using the R package LEA, how to visualise admixture proportions at both the individual and the population level, and how to plot admixture data as pie charts on a map. 
3 | 
4 | <img src="Images/3.Admixture_bar_map.png" width="1000" height="600" >
5 | 


--------------------------------------------------------------------------------
/TJ_genind2structure_function.R:
--------------------------------------------------------------------------------
  1 | #======================================
  2 | # R function to export a genind object in STRUCTURE format
  3 | # 
  4 | # Tom Jenkins t.l.jenkins@exeter.ac.uk
  5 | #
  6 | # July 2018
  7 | #
  8 | #======================================
  9 | 
 10 | # data: genind object
 11 | # file: file name to write
 12 | # pops: whether to include population info
 13 | # markers: whether to include marker info
 14 | # unix: export as a unix text file (windows text file default)
 15 | # Function is flexible with regards to ploidy, although genotypes are
 16 | # considered to be unambiguous.
 17 | # Missing data must be recorded as NA.
 18 | # SNP data must be biallelic with all alleles present.
 19 | 
 20 | # Example use: 
 21 | # library(adegenet)
 22 | # ind = as.character(paste("ind_", seq(1:100), sep=""))
 23 | # pop = as.character(c(rep("pop1",25), rep("pop2",25), rep("pop3",25), rep("pop4",25)))
 24 | # loci = list(c("AA","AC","CC"), c("GG","GC","CC"), c("TT","TA","AA"), c("CC","CT","TT"))
 25 | # loci = sample(loci, 100, replace=T)
 26 | # loci = lapply(loci, sample, size=100, replace=TRUE)
 27 | # geno = as.data.frame(loci, col.names= .genlab("loc",100))
 28 | # data = df2genind(geno, ploidy=2, ind.names=ind, pop=pop, sep="")
 29 | # genind2structure(data, file="example_structure.str")
 30 | 
 31 | # Convert Windows text file to Unix text file in Linux
 32 | # awk '{ sub("\r$", ""); print }' winfile.txt > unixfile.txt
 33 | 
 34 | 
 35 | genind2structure = function(data, file="", pops=TRUE, markers=TRUE, unix=FALSE){
 36 |   
 37 |   ## Check input file a genind object
 38 |   if(!"genind" %in% class(data)){
 39 |     warning("Function was designed for genind objects.")
 40 |   }
 41 |   
 42 |   ## Check adegenet, miscTools and stringr are installed
 43 |   if(!require(adegenet)){install.packages("adegenet")}
 44 |   if(!require(miscTools)){install.packages("miscTools")}
 45 |   if(!require(stringr)){install.packages("stringr")}
 46 | 
 47 |   
 48 |   # ---------------- #
 49 |   #
 50 |   # Preamble
 51 |   #
 52 |   # ---------------- #
 53 |   
 54 |   ## Ploidy
 55 |   ploid = max(data$ploidy)
 56 |   
 57 |   ## Number of individuals
 58 |   ind = nInd(data)
 59 |   
 60 |   ## Create dataframe containing individual labels
 61 |   ## Number of duplicated labels depends on the ploidy of the dataset
 62 |   df = data.frame(ind = rep(indNames(data), each=ploid))
 63 |   
 64 |   ## Locus names
 65 |   loci = locNames(data)
 66 |   
 67 |   
 68 |   # ---------------- #
 69 |   #
 70 |   # Population IDs
 71 |   #
 72 |   # ---------------- #
 73 |   
 74 |   if(pops){
 75 |     
 76 |   ## Create dataframe containing individual labels
 77 |   ## Number of duplicated labels depends on the ploidy of the dataset
 78 |   df.pop = data.frame(pop = rep(as.numeric(data$pop), each=ploid))
 79 |   
 80 |   ## Add population IDs to dataframe
 81 |   df$Pop = df.pop$pop
 82 |   }
 83 |   
 84 |   # ---------------- #
 85 |   #
 86 |   # Process genotypes
 87 |   #
 88 |   # ---------------- #
 89 |   
 90 |   ## Add columns for genotypes
 91 |   df = cbind(df, matrix(-9, # -9 codes for missing data
 92 |                         nrow=dim(df)[1],
 93 |                         ncol=nLoc(data),
 94 |                         dimnames=list(NULL,loci)))
 95 |   
 96 |   ## Loop through dataset to extract genotypes
 97 |   for(L in loci){
 98 |     thesedata = data$tab[, grep(paste("^", L, "\\.", sep=""), dimnames(data$tab)[[2]])] # dataotypes by locus
 99 |     al = 1:dim(thesedata)[2] # numbered alleles
100 |     for(s in 1:ind){
101 |       if(all(!is.na(thesedata[s,]))){
102 |         tabrows = (1:dim(df)[1])[df[[1]] == indNames(data)[s]] # index of rows in output to write to
103 |         tabrows = tabrows[1:sum(thesedata[s,])] # subset if this is lower ploidy than max ploidy
104 |         df[tabrows,L] = rep(al, times = thesedata[s,])
105 |       }
106 |     }
107 |   }
108 |   
109 |   
110 |   # ---------------- #
111 |   #
112 |   # Marker IDs
113 |   #
114 |   # ---------------- #
115 |   
116 |   if(markers){
117 |   
118 |   ## Add a row at the top containing loci names
119 |   df = as.data.frame(insertRow(as.matrix(df), 1, c(loci,"","")))
120 |   
121 |   }
122 |   
123 |   # ---------------- #
124 |   #
125 |   # Export file
126 |   #
127 |   # ---------------- #
128 |   
129 |   ## Export dataframe
130 |   write.table(df, file=file, sep="\t", quote=FALSE,
131 |               row.names=FALSE, col.names=FALSE)
132 |   
133 |   ## Export dataframe as unix text file
134 |   if(unix){
135 |     output_file = file(file, open="wb")
136 |     write.table(df, file=output_file, sep="\t", quote=FALSE,
137 |                 row.names=FALSE, col.names=FALSE)
138 |     close(output_file)
139 |   }
140 |   
141 | }
142 |   
143 | 
144 | 


--------------------------------------------------------------------------------
/coordinates.csv:
--------------------------------------------------------------------------------
 1 | Code,Lat,Lon
 2 | Ale,40.84,25.87
 3 | Ber,60.65,4.77
 4 | Brd,54.08,-0.17
 5 | Cor,51.84,-8.26
 6 | Cro,52.94,1.31
 7 | Eye,55.88,-2.07
 8 | Flo,58.42,8.76
 9 | Gul,58.25,11.33
10 | Heb,57.79,-7.25
11 | Hel,54.18,7.9
12 | Hoo,52.12,-6.92
13 | Idr16,46.13,-1.250001
14 | Idr17,46.13,-1.250001
15 | Iom,54.12,-4.5
16 | Ios,49.92,-6.33
17 | Jer,49.16,-2.12
18 | Kav,58.33,11.37
19 | Kil,53.28,-9.77
20 | Laz,41.44,12.62
21 | Loo,50.35,-4.44
22 | Lyn,52.93,-4.62
23 | Lys,58.26,11.37
24 | Mul,54.19,-10.15
25 | Oos,51.61,3.7
26 | Ork,59,-2.83
27 | Pad,50.56,-4.98
28 | Pem,51.81,-5.29
29 | Sar13,41.26,9.2
30 | Sar17,41.26,9.2
31 | Sbs,50.82,-0.26
32 | She,60.17,-1.4
33 | Sin,59.08,11.12
34 | Sky,38.82,24.53
35 | Sul,59.09,-6.16
36 | Tar,42.23,11.68
37 | The,40.36,22.88
38 | Tor,40.17,23.54
39 | Tro,63.76,9.15
40 | Ven,52.12,-10.35
41 | Vig,42.49,-8.99
42 | 


--------------------------------------------------------------------------------
/lobster_1278ind_79snps_40pop.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tom-Jenkins/admixture_pie_chart_map_tutorial/b4c44c1bd3dbf546b425b90b603a5d795b3f5a9e/lobster_1278ind_79snps_40pop.RData


--------------------------------------------------------------------------------
/pie_chart_admixture_map_tutorial.R:
--------------------------------------------------------------------------------
  1 | # --------------------------- #
  2 | #
  3 | # Tutorial: 
  4 | # Admixture > Pie charts > Map
  5 | #
  6 | # Description:
  7 | # Tutorial on how to run STRUCTURE-like analyses in R, plot admixture proportions
  8 | # using pie charts, and visualise the pie charts on a map.
  9 | # 
 10 | # Data:
 11 | # European lobster SNP genotypes generated by a Fluidigm EP1 system.
 12 | # Tutorial uses ten sites from the original study (Jenkins et al. 2019).
 13 | # Data can be download from the link below:
 14 | # https://doi.org/10.5061/dryad.2v1kr38
 15 | # "lobster_1278ind_79snps_40pop.RData" in Miscellaneous_files.zip
 16 | #
 17 | # Notes before execution:
 18 | # 1. Make sure all required R packages are installed.
 19 | # 2. Set your working directory to the location of this R script.
 20 | #
 21 | # --------------------------- #
 22 | 
 23 | # Load packages
 24 | library(adegenet)
 25 | library(poppr)
 26 | library(LEA)
 27 | library(reshape2)
 28 | library(dplyr)
 29 | library(ggplot2)
 30 | library(rworldmap)
 31 | library(rworldxtra)
 32 | library(ggsn)
 33 | library(sf)
 34 | library(raster)
 35 | library(rgeos)
 36 | library(maps)
 37 | library(maptools)
 38 | library(grid)
 39 | library(miscTools)
 40 | library(stringr)
 41 | library(ggpubr)
 42 | 
 43 | # Import genotypes
 44 | load("lobster_1278ind_79snps_40pop.RData")
 45 | 
 46 | # Explore data
 47 | data_filt
 48 | nLoc(data_filt) # number of loci
 49 | nPop(data_filt) # number of sites
 50 | nInd(data_filt) # number of individuals
 51 | summary(data_filt$pop) # sample size
 52 | 
 53 | # Subset data to reduce computation time
 54 | subsites = sort(c("Vig","Ios","Mul","Cro","She","Idr17","Hel","Flo","Lys","Ber"))
 55 | data_filt = popsub(data_filt, sublist = subsites)
 56 | data_filt
 57 | 
 58 | 
 59 | # ----------------- #
 60 | #
 61 | # Admixture analysis using SNMF from LEA
 62 | #
 63 | # ----------------- #
 64 | 
 65 | # Export genotypes in STRUCTURE format
 66 | source("TJ_genind2structure_function.R")
 67 | genind2structure(data_filt, file = "genotypes", pops = FALSE, markers = FALSE)
 68 | 
 69 | # Convert STRUCTURE file to .geno format
 70 | struct2geno("genotypes", ploidy = 2, FORMAT = 2, extra.column = 1)
 71 | 
 72 | # Run snmf algorithm
 73 | set.seed(123)
 74 | snmf1 = snmf("genotypes.geno",
 75 |              K = 1:10, # number of K ancestral populations to run
 76 |              repetitions = 10, # ten repetitions for each K
 77 |              entropy = TRUE, # calculate cross-entropy
 78 |              project = "new")
 79 | 
 80 | # Load snmf project
 81 | snmf1 = load.snmfProject("genotypes.snmfProject")
 82 | 
 83 | # Plot cross-entropy results to assess optimal number of K
 84 | # Smaller values of cross-entropy usually mean better runs
 85 | # A plateau usually represents the K that best fits the data
 86 | plot(snmf1, col = "blue", cex = 1.5, pch = 19)
 87 | 
 88 | # Extract the cross-entropy of all runs where K = 2
 89 | ce = cross.entropy(snmf1, K = 2)
 90 | ce
 91 | 
 92 | # Find the run with the lowest cross-entropy
 93 | lowest.ce = which.min(ce)
 94 | lowest.ce
 95 | 
 96 | # Extract Q-matrix for the best run
 97 | qmatrix = as.data.frame(Q(snmf1, K = 2, run = lowest.ce))
 98 | head(qmatrix)
 99 | 
100 | # Label column names of qmatrix
101 | ncol(qmatrix)
102 | cluster_names = c()
103 | for (i in 1:ncol(qmatrix)){
104 |   cluster_names[i] = paste("Cluster", i)
105 | }
106 | cluster_names
107 | colnames(qmatrix) = cluster_names
108 | head(qmatrix)
109 | 
110 | # Add individual IDs
111 | qmatrix$Ind = indNames(data_filt)
112 | 
113 | # Add site IDs
114 | qmatrix$Site = data_filt$pop
115 | head(qmatrix)
116 | 
117 | # Convert dataframe to long format
118 | qlong = melt(qmatrix, id.vars=c("Ind","Site"))
119 | head(qlong)
120 | 
121 | # Change order of sites by using the factor function
122 | # site.order = c("Vig","Ios","Cor","Mul","She","Cro","Hel","Flo","Lys","Ber")
123 | # qlong$Site_ord = factor(qlong$Site, levels = site.order)
124 | 
125 | # Adjust facet labels
126 | levels(qlong$Site)
127 | facet.labs = c("Bergen","Cromer","Flodevigen","Helgoland","Île de Ré",
128 |                "Isles of Scilly","Lysekil","Mullet Peninsula","Shetland","Vigo")
129 | levels(qlong$Site) = facet.labs
130 | levels(qlong$Site)
131 | 
132 | # Define colour palette
133 | pal = colorRampPalette(c("green","blue"))
134 | cols = pal(length(unique(qlong$variable)))
135 | 
136 | # Plot admixture barplot 
137 | admix.bar = ggplot(data=qlong, aes(x=Ind, y=value, fill=variable))+
138 |   geom_bar(stat = "identity")+
139 |   scale_y_continuous(expand = c(0,0))+
140 |   facet_wrap(~Site, scales = "free", ncol = 2)+
141 |   scale_fill_manual(values = cols)+
142 |   ylab("Admixture proportion")+
143 |   # xlab("Individual")+
144 |   theme(axis.text.x = element_blank(),
145 |         axis.ticks.x = element_blank(),
146 |         axis.title.x = element_blank(),
147 |         strip.text = element_text(colour="black", size=12),
148 |         panel.grid = element_blank(),
149 |         panel.background = element_blank(),
150 |         legend.position = "top",
151 |         legend.title = element_blank(),
152 |         legend.text = element_text(size = 12))
153 | admix.bar
154 | ggsave("1.admixture_barplot.png", width=6, height=10, dpi=300)
155 | 
156 | 
157 | # ----------------- #
158 | #
159 | # Prepare pie charts
160 | #
161 | # ----------------- #
162 | 
163 | # Calculate mean admixture proportions for each site
164 | head(qmatrix)
165 | clusters = grep("Cluster", names(qmatrix)) # indexes of cluster columns
166 | avg_admix = aggregate(qmatrix[, clusters], list(qmatrix$Site), mean)
167 | 
168 | # Order alphabetically by site
169 | avg_admix = avg_admix[order(as.character(avg_admix$Group.1)), ]
170 | avg_admix
171 | 
172 | # Convert dataframe from wide to long format
173 | avg_admix = melt(avg_admix, id.vars = "Group.1")
174 | head(avg_admix)
175 | 
176 | # Define a function to plot pie charts using ggplot for each site
177 | pie_charts = function(admix_df, site, cols){
178 |   # admix_df = dataframe in long format of admixture proportions per site 
179 |   # site = string 
180 |   # cols = vector of colours of length(clusters)
181 |   ggplot(data = subset(admix_df, Group.1 == site),
182 |          aes(x = "", y = value, fill = variable))+
183 |     geom_bar(width = 1, stat = "identity", colour = "black", show.legend = FALSE)+
184 |     coord_polar(theta = "y")+
185 |     scale_fill_manual(values = cols)+
186 |     theme_void()
187 | }
188 | 
189 | # Test function on one site
190 | pie_charts(avg_admix, site = "Ber", cols = cols)
191 | 
192 | # Apply function to all sites using for loop
193 | pies = list()
194 | for (i in subsites){
195 |   pies[[i]] = pie_charts(admix_df = avg_admix, site = i, cols = cols) 
196 | }
197 | 
198 | 
199 | # ----------------- #
200 | #
201 | # Prepare basemap
202 | #
203 | # ----------------- #
204 | 
205 | # Import csv file containing coordinates
206 | coords = read.csv("coordinates.csv")
207 | 
208 | # Subset coordinates
209 | coords = coords[coords$Code %in% subsites, ]
210 | 
211 | # Order alphabetically by site
212 | coords = coords[order(coords$Code), ] 
213 | coords
214 | 
215 | # Check order matches coords order
216 | as.character(avg_admix$Group.1) == as.character(coords$Code)
217 | 
218 | # Set map boundary (xmin, xmax, ymin, ymax)
219 | boundary = extent(-15, 15, 40, 64)
220 | boundary
221 | 
222 | # Get map outlines from rworldmap package
223 | map.outline = getMap(resolution = "high")
224 | 
225 | # Crop to boundary and convert to dataframe
226 | map.outline = crop(map.outline, y = boundary) %>% fortify()
227 | 
228 | # Plot basemap
229 | basemap = ggplot()+
230 |   geom_polygon(data=map.outline, aes(x=long, y=lat, group=group), fill="grey",
231 |                colour="black", size=0.5)+
232 |   coord_quickmap(expand=F)+
233 |   ggsn::north(map.outline, symbol = 10, scale = 0.06, location = "topleft")+
234 |   ggsn::scalebar(data = map.outline, dist = 200, dist_unit = "km", height = 0.01,
235 |                  transform = TRUE, model = "WGS84", 
236 |                  location = "bottomleft", anchor = c(x = -12.5, y = 45),
237 |                  st.bottom = FALSE, st.size = 4, st.dist = 0.015)+
238 |   xlab("Longitude")+
239 |   ylab("Latitude")+
240 |   theme(
241 |     axis.text = element_text(colour="black", size=12),
242 |     axis.title = element_text(colour="black", size=14),
243 |     panel.background = element_rect(fill="lightsteelblue2"),
244 |     panel.border = element_rect(fill = NA, colour = "black", size = 0.5),
245 |     panel.grid.minor = element_line(colour="grey90", size=0.5),
246 |     panel.grid.major = element_line(colour="grey90", size=0.5),
247 |     legend.text = element_text(size=12),
248 |     legend.title = element_blank(),
249 |     legend.key.size = unit(0.7, "cm"),
250 |     legend.position = "top"
251 |   )
252 | basemap
253 | 
254 | 
255 | # ----------------- #
256 | #
257 | # Add pie charts to basemap
258 | #
259 | # ----------------- #
260 | 
261 | # Extract coordinates for each site
262 | coord.list = list()
263 | for (i in subsites){
264 |   coord.list[[i]] = c(subset(coords, Code == i)$Lon, subset(coords, Code == i)$Lat)
265 | }
266 | coord.list
267 | 
268 | # Define pie chart sizes
269 | radius = 1
270 | 
271 | # Convert ggplot pie charts to annotation_custom layers
272 | pies.ac = list()
273 | for (i in 1:length(subsites)){
274 |   pies.ac[[i]] = annotation_custom(grob = ggplotGrob(pies[[i]]),
275 |                                    xmin = coord.list[[i]][[1]] - radius,
276 |                                    xmax = coord.list[[i]][[1]] + radius,
277 |                                    ymin = coord.list[[i]][[2]] - radius,
278 |                                    ymax = coord.list[[i]][[2]] + radius)
279 | }
280 | 
281 | # Add layers to basemap
282 | pie.map = basemap + pies.ac
283 | pie.map
284 | ggsave("2.pie_charts_map.png", width = 8, height = 10, dpi = 300)
285 | 
286 | # Combine ggplots
287 | ggarrange(admix.bar + theme(legend.position = "right") + labs(title = "Individual admixture proportions", tag = "A"),
288 |           pie.map + labs(title = "Mean admixture proportions for each site", tag = "B"))
289 | ggsave("3.Admixture_bar_map.png", width = 15, height = 10, dpi = 300)
290 | 


--------------------------------------------------------------------------------