├── NGS.pdf
├── CGA_MuseumGenomics_Singhal.pdf
├── Massoko_smartpca.par
├── convert.par
├── summarize_ld.py
├── vcf_to_phy.py
├── vcf_to_geno.py
└── pop_gen_tutorial.rst


/NGS.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singhal/popgen_tutorial/HEAD/NGS.pdf


--------------------------------------------------------------------------------
/CGA_MuseumGenomics_Singhal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singhal/popgen_tutorial/HEAD/CGA_MuseumGenomics_Singhal.pdf


--------------------------------------------------------------------------------
/Massoko_smartpca.par:
--------------------------------------------------------------------------------
1 | genotypename:	Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.geno
2 | snpname: Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.snp
3 | indivname: Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.ind
4 | evecoutname: Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.evec
5 | evaloutname: Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.eval
6 | 


--------------------------------------------------------------------------------
/convert.par:
--------------------------------------------------------------------------------
1 | genotypename:	Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.geno
2 | snpname:	Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.snp
3 | indivname:	Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.ind
4 | outputformat:    PED
5 | genotypeoutname: Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.ped
6 | snpoutname: Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.map
7 | indivoutname: Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.pedind 
8 | 


--------------------------------------------------------------------------------
/summarize_ld.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import gzip
 3 | import os
 4 | import argparse
 5 | 
 6 | parser = argparse.ArgumentParser(
 7 |                 description="Summarize LD file.",
 8 |                 formatter_class=argparse.ArgumentDefaultsHelpFormatter
 9 |                 )
10 | 
11 | parser.add_argument('--infile', type=str, default=None,
12 |                         help='VCFtools LD file to convert')
13 | 
14 | parser.add_argument('--win', type=int, default = 1,
15 |                         help='The window across which to summarize LD patterns')
16 | 
17 | args = parser.parse_args()
18 | 
19 | f = open(args.infile, 'r')
20 | out = args.infile + '_summary_window%s' % args.win
21 | o = open(out, 'w')
22 | 
23 | r2 = {}
24 | header = f.next()
25 | for l in f:
26 | 	d = re.split('\s+', l.rstrip())
27 | 	dist = int(d[2]) - int(d[1])
28 | 	dist = int(dist / float(args.win)) * args.win
29 | 
30 | 	if dist not in r2:
31 | 		r2[dist] = {'val': 0, 'num': 0}
32 | 	r2[dist]['val'] += float(d[4])
33 | 	r2[dist]['num'] += 1
34 | f.close()
35 | 
36 | o.write('distance\tnum_comparisons\tr2\n')
37 | for dist in sorted(r2.keys()):
38 | 	avg = r2[dist]['val'] / float(r2[dist]['num'])
39 | 	o.write('%s\t%s\t%.4f\n' % (dist, r2[dist]['num'], avg))
40 | o.close()
41 | 	
42 | 


--------------------------------------------------------------------------------
/vcf_to_phy.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import gzip
 3 | import os
 4 | import argparse
 5 | 
 6 | parser = argparse.ArgumentParser(
 7 | 		description="Turn VCF into phy.",
 8 | 		formatter_class=argparse.ArgumentDefaultsHelpFormatter
 9 | 		)
10 | 
11 | parser.add_argument('--infile', type=str, default=None,
12 | 			help='Gzipped infile to convert')
13 | 
14 | parser.add_argument('--thin', type=int, default = 1,
15 | 			help='Further thinning')
16 | 
17 | args = parser.parse_args()
18 | 
19 | # set up the input and output files
20 | infile = args.infile
21 | f = gzip.open(infile, 'r')
22 | out_fa = re.sub('.vcf.gz', '_thin%s.phy' % args.thin, infile)
23 | o = open(out_fa, 'w')
24 | 
25 | seq = {}
26 | 
27 | for ix, l in enumerate(f):
28 | 	l = l.decode('ascii')
29 | 	if re.search('CHROM', l):
30 | 		d = re.split('\t', l.rstrip())
31 | 		inds = d[9:]
32 | 		for ind in inds:
33 | 			seq[ind] = ''
34 | 	elif not re.search('^#', l):
35 | 		# thin it further!
36 | 		if ix % args.thin == 0:
37 | 			d = re.split('\t', l.rstrip())
38 | 	
39 | 			snps = {}
40 | 			alleles = [d[3]] + re.split(',', d[4])
41 | 			for ix, a in enumerate(alleles):
42 | 				snps[str(ix)] = a
43 | 			snps['.'] = 'N'
44 | 
45 | 			genos = [re.search('^(\S)', x).group(1) for x in d[9:]]
46 | 		
47 | 			for ind, geno in zip(inds, genos):
48 | 				seq[ind] += snps[geno]
49 | f.close()
50 | 
51 | o.write('%s %s\n' % (len(seq), len(seq[list(seq.keys())[0]])))
52 | for ind, s in seq.items():
53 | 	o.write('%s %s\n' % (ind, s))
54 | o.close()
55 | 


--------------------------------------------------------------------------------
/vcf_to_geno.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import gzip
 3 | import os
 4 | 
 5 | dir = '/home/ubuntu/'
 6 | 
 7 | in_vcf = os.path.join(dir, 'Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.vcf.gz')
 8 | f = gzip.open(in_vcf, 'r')
 9 | 
10 | out_g = os.path.join(dir, 'Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.geno')
11 | out_s = os.path.join(dir, 'Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.snp')
12 | out_i = os.path.join(dir, 'Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.ind')
13 | o1 = open(out_g, 'w')
14 | # total hack the snp data because human data
15 | o2 = open(out_s, 'w')
16 | o3 = open(out_i, 'w')
17 | 
18 | alleles = {	'0|0': '0', '0|1': '1', '1|1': '2', 
19 | 		'0/0': '0', '0/1': '1', '1/1': '2', '1|0': '1'}
20 | 
21 | gen_length = 850000000
22 | max_len = 0
23 | cur_len = 0
24 | cur_chr = 'NA'
25 | 
26 | snp = 1
27 | snps = {}
28 | for l in f:
29 | 	l = l.decode('ascii')
30 | 	if re.search('CHROM', l):
31 | 		d = re.split('\t', l.rstrip())
32 | 		inds = d[9:]
33 | 		for ind in inds:
34 | 			group = re.sub('_[^_]+$', '', ind)
35 | 			o3.write('%s\tU\t%s\n' % (ind, group))
36 | 		o3.close()
37 | 	elif not re.search('^#', l):
38 | 		d = re.split('\t', l.rstrip())
39 | 	
40 | 		if d[0] != cur_chr:
41 | 			cur_chr = d[0]	
42 | 			cur_len += max_len
43 | 	
44 | 		max_len = int(d[1])
45 | 		cur_chr = d[0]
46 | 
47 | 		genos = [re.search('^(\S\S\S)', x).group(1) for x in d[9:]]
48 | 		if len(set(genos)) > 1:
49 | 			genos = [alleles[x] if x in alleles else '9' for x in genos] 		
50 | 			o1.write('%s\n' % ''.join(genos))
51 | 			o2.write('rs%s\t1\t%.3f\t%s\n' % 
52 | 				(snp, (int(d[1]) + cur_len) / float(gen_length), int(d[1]) + cur_len))
53 | 			snp += 1
54 | f.close()
55 | o1.close()
56 | 


--------------------------------------------------------------------------------
/pop_gen_tutorial.rst:
--------------------------------------------------------------------------------
  1 | ============================
  2 | Population Genetics Tutorial
  3 | ============================
  4 | 
  5 | ========= 
  6 | Exercise
  7 | ========= 
  8 | Before you guys got here
  9 | ~~~~~~~~~~~~~~~~~~~~~~~~
 10 | 
 11 | Started with data from: "Genomic islands of speciation separate cichlid ecomorphs in an East African crater lake", Malinsky et al 2015. 
 12 | 
 13 | Downloaded VCF from http://datadryad.org/resource/doi:10.5061/dryad.770mc
 14 | 	- http://datadryad.org/bitstream/handle/10255/dryad.101389/Massoko_Dryad_VCF_final.vcf.gz
 15 | 	- These data had been filtered for quality
 16 | 	- And only variable sites had been retained
 17 | 	- And phased using the program `BEAGLE`, which relies on linkage disequilibrium to phase haplotypes
 18 | 
 19 | Made the VCF smaller so we could analyze it in this lifetime: 36 individuals and no indels.::
 20 | 
 21 | 	vcftools --gzvcf Massoko_Dryad_VCF_final.vcf.gz --keep inds_to_keep.txt --stdout --recode --recode-INFO-all --remove-indels | gzip -c > Massoko_Dryad_VCF_final_subset_noIndels.vcf.gz
 22 | 
 23 | Made the VCF smaller still to remove low frequency sites and then local linkage disequilibrium. We will use these files for many of our analyses.::
 24 | 
 25 | 	vcftools --gzvcf Massoko_Dryad_VCF_final_subset_noIndels.vcf.gz --maf 0.05 --max-maf 0.95 --stdout --recode --recode-INFO-all | gzip -c > Massoko_Dryad_VCF_final_subset_noIndels_maf05.vcf.gz
 26 | 	vcftools --gzvcf Massoko_Dryad_VCF_final_subset_noIndels_maf05.vcf.gz --thin 1000 --stdout --recode --recode-INFO-all | gzip -c > Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.vcf.gz
 27 | 
 28 | Used the thinned VCF to make input files for phylogenetic inference and population structure analyses.::
 29 | 
 30 | 	python vcf_to_phy.py --infile Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.vcf.gz --thin 5
 31 | 	python vcf_to_geno.py
 32 | 	EIG-6.1.3/bin/convertf -p convert.par
 33 | 	cat Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.map | awk -F'\\\s+' '{print $1,$2, $3,$4}' > map
 34 | 	mv map Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.map
 35 | 	plink --file Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K --recode
 36 | 	mv plink.ped Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.ped
 37 | 	mv plink.map Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.map
 38 | 
 39 | 
 40 | Now you start
 41 | ~~~~~~~~~~~~~
 42 | 
 43 | Install many useful software.::
 44 | 
 45 | 	R
 46 | 	raxml
 47 | 	vcftools
 48 | 	eigensoft
 49 | 	admixture
 50 | 
 51 | You can download the smaller data set and ancillary files from here.::
 52 | 
 53 | 	wget https://www.dropbox.com/s/ra4yqix0jfe1fgn/tutorial_files.tar.gz
 54 | 	tar -xzvf tutorial_files.tar.gz
 55 | 	cd tutorial_files
 56 | 
 57 | Calculate nucleotide diversity (pi). Use `VCFtools` to figure out how to calculate it. We want to calculate it for 'benthic' and 'littoral' morphs separately.::
 58 | 
 59 | 	vcftools --gzvcf Massoko_Dryad_VCF_final_subset_noIndels.vcf.gz --keep littoral.txt --window-pi 100000 --out littoral_pi
 60 | 	vcftools --gzvcf Massoko_Dryad_VCF_final_subset_noIndels.vcf.gz --keep benthic.txt --window-pi 100000 --out benthic_pi
 61 | 
 62 | Calculate linkage disequilibrium.::
 63 | 
 64 | 	vcftools --gzvcf Massoko_Dryad_VCF_final_subset_noIndels.vcf.gz --keep littoral.txt --ld-window-bp 500000 --chr scaffold_0 --hap-r2 --out littoral_scaffold_0_ld --min-r2 0.001
 65 | 	vcftools --gzvcf Massoko_Dryad_VCF_final_subset_noIndels.vcf.gz --keep benthic.txt --ld-window-bp 500000 --chr scaffold_0 --hap-r2 --out benthic_scaffold_0_ld --min-r2 0.001
 66 | 
 67 | Summarize linkage disequilibrium data files so that they are smaller and easier to plot.::
 68 | 
 69 | 	python summarize_ld.py --infile littoral_scaffold_0_ld.hap.ld --win 10
 70 | 	python summarize_ld.py --infile benthic_scaffold_0_ld.hap.ld --win 10
 71 | 
 72 | Calculate Fst between benthic and limnetic forms.::
 73 | 
 74 | 	vcftools --gzvcf Massoko_Dryad_VCF_final_subset_noIndels.vcf.gz --weir-fst-pop littoral.txt --weir-fst-pop benthic.txt --fst-window-size 100000 --out benthic_limnetic_fst
 75 | 
 76 | Make a phylogeny.::
 77 | 
 78 | 	raxmlHPC -m GTRGAMMA -n Massoko -s Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K_thin5.phy -p 123 -o A_calliptera_Chitimba,A_calliptera_Bua,A_calliptera_Chizumulu
 79 | 
 80 | Run `ADMIXTURE` for up to 6 populations.::
 81 | 
 82 | 	admixture Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.ped 1
 83 | 	admixture Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.ped 2
 84 | 	...
 85 | 
 86 | Run `smartpca`.::
 87 | 
 88 | 	smartpca -p Massoko_smartpca.par > Massoko_smartpca.out
 89 | 
 90 | Now that we have all the different pieces, let's start to plot the data and see what we find. Put all the results into one folder and download them locally so that we can plot and visualize them using `R`.
 91 | 
 92 | Just to be sure, here are all the files you should have. Should things be taking too long, you can borrow my results that I generated earlier: https://www.dropbox.com/s/czrru76ku2kqwt2/results.tar.gz?dl=0 
 93 | 
 94 | - benthic_limnetic_fst.windowed.weir.fst
 95 | - benthic_pi.windowed.pi
 96 | - benthic_scaffold_0_ld.hap.ld_summary_window10
 97 | - littoral_pi.windowed.pi
 98 | - littoral_scaffold_0_ld.hap.ld_summary_window10
 99 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.1.P
100 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.1.Q
101 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.2.P
102 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.2.Q
103 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.3.P
104 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.3.Q
105 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.4.P
106 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.4.Q
107 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.5.P
108 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.5.Q
109 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.6.P
110 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.6.Q
111 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.eval
112 | - Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.evec
113 | - RAxML_bestTree.Massoko
114 | 
115 | We have the following data types.
116 | 
117 | #. Genetic diversity.
118 | #. Genetic differentiation. (Fst)
119 | #. Decay of linkage disequilibrium.
120 | #. A tree.
121 | #. PCA results.
122 | #. `ADMIXTURE` population clustering results.
123 | 
124 | We will be using `R` to plot all these results. I will get you started on how to start thinking about some of these. I would recommend setting your working directory to be the directory that has all your results. For example,::
125 | 
126 | 	setwd("/Users/sonal/Desktop/results/")
127 | 
128 | Note that this is generally considered bad programming practice for scripts that will be publicly shared, but it is convenient when doing exploratory data analysis.
129 | 
130 | Genetic diversity
131 | ~~~~~~~~~~~~~~~~~
132 | To load the genetic diversity results,::
133 | 
134 | 	b = read.table("benthic_pi.windowed.pi", header=T)
135 | 	l = read.table("littoral_pi.windowed.pi", header=T)
136 | 
137 | Look at how the data is structured and summarize it quickly,::
138 | 
139 | 	head(b)
140 | 	summary(b)
141 | 
142 | To answer some of the questions below, it might be useful to combine across both data-frames::
143 | 
144 | 	x = merge(b, l, by=c("CHROM", "BIN_START", "BIN_END"))
145 | 
146 | To answer some of the questions below, it might be useful to combine across both data-frames in another way::
147 | 
148 | 	all = data.frame(c(b$PI, l$PI), c(rep("benthic", nrow(b)), c(rep("littoral", nrow(l)))))
149 | 	names(all) = c("PI", "MORPH")
150 | 
151 | You might want to also explore the following functions to answer the questions::
152 | 
153 | 	cor.test()
154 | 	boxplot()
155 | 	aov() 
156 |         # if you store the results of aov() in a variable and then run summary() on the variable, you get more info
157 | 
158 | 
159 | Some questions:
160 | 
161 | #. What is min, max, and mean levels of genetic diversity in each morph?
162 | #. Is genetic diversity between the two morphs significantly different?
163 | #. Why might genetic diversity be higher in one morph than another? How could you test this?
164 | #. How correlated is genetic diversity between the two morphs?
165 | #. Why would genetic diversity be correlated between the two morphs?
166 | 
167 | Genetic differentiation
168 | ~~~~~~~~~~~~~~~~~~~~~~~
169 | To load the genetic differentiation results,::
170 | 
171 | 	fst = read.table("benthic_limnetic_fst.windowed.weir.fst", header=T)
172 | 
173 | To select rows that have certain values,::
174 | 
175 | 	x = fst[fst$CHROM == 'scaffold_0', ]
176 | 	x = fst[fst$WEIGHTED_FST >= 0.1, ]
177 | 
178 | You might want to explore the functions::
179 | 
180 | 	dim()
181 | 	nrow()
182 | 
183 | Which allow you to quickly figure out how big these dataframes are.
184 | 
185 | Some questions:
186 | 
187 | #. What is the mean Fst between these two morphs?
188 | #. Is there a correlation between the number of variants in a window and Fst? If so, it would suggest we need to be cautious of these results.
189 | #. In this paper, the authors emphasize the importance of genomic regions that are highly differentiated. How many 100 kb windows are differentiated above Fst >0.1? Fst >0.2? Fst >0.3?
190 | #. Plot Fst along BIN_START on scaffold_15. The authors originally identified 10 peaks (see Fig. 3D) that are highly differentiated. How many do you identify? Why might our results be different?
191 | #. How might you determine if windows with high Fst are significant?
192 | 
193 | Decay of Linkage Disequilibrium
194 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195 | You can read in the tables for linkage disequilibrium just like you did for nucleotide diversity.
196 | 
197 | Having done that, we can now plot the data. Plot distance on the x-axis and :math:`r^2` on the y-axis (a measure of linkage disequilibrium that looks at the correlation coefficient between pairs of loci -- higher values means that two loci "travel" together more than you would expect under random assortment).
198 | 
199 | Try plotting both morphs at once. You will want to use the `points()` function.
200 | 
201 | Some questions:
202 | 
203 | #. Do the two morphs have different decay patterns? 
204 | #. A key aspect of linkage disequilibrium is how quickly it decays. At what physical distance is the level of linkage disequilbrium halved? You can estimate this visually or using R.
205 | #. These points are very very noisy. How might you do this exercise again to reduce some of this noise? If you have time, try it!
206 | 
207 | Plot the phylogeny
208 | ~~~~~~~~~~~~~~~~~~
209 | To plot the phylogeny, you will need to install the library ape.::
210 | 
211 | 	install.packages("ape")
212 | 	library(ape)
213 | 
214 | Then, you can read in and plot tree.::
215 | 
216 | 	t = read.tree("RAxML_bestTree.Massoko")
217 | 	# makes the tree easier to visualize by ladderizing it
218 | 	t = ladderize(t)
219 | 	plot(t)
220 | 
221 | Some questions:
222 | 
223 | #. What do you think is going on with the "small" morph?
224 | #. Looking at this tree, would you say that the "littoral" and "benthic" morphs are differentiated? Why or why not?
225 | #. Before we use this tree for any formal analysis, what else might you want to check about the tree?
226 | 
227 | Plot the PCA
228 | ~~~~~~~~~~~~
229 | To read in the PCA data::
230 | 
231 | 	d = read.table("Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.evec")
232 | 
233 | Note that the eval file has the data we would need to calculate the eigenvalues for each PCA axis.
234 | 
235 | Look at the data file using `head()` -- how is it structured? What does each column mean? 
236 | 
237 | You can plot it by::
238 | 
239 | 	plot(d$V2, d$V3, col=as.factor(d$V12), pch=16)
240 | 
241 | This isn't such an informative plot. Why? How would you subset the data to make it more informative? Hint: look at column V12.::
242 | 
243 | 	s = d[d$V12 %in% c("Massoko_benthic", "Massoko_littoral", "Massoko_small"),]
244 | 
245 | This still isn't as informative as it could be. It likely would have been much more informative if we removed the outgroups before doing the PCA. That said, are these morphs differentiated? How do these results compare to what we saw with the phylogeny? Why might these results be different?
246 | 
247 | ADMIXTURE results
248 | ~~~~~~~~~~~~~~~~~
249 | To read in the `ADMIXTURE` results::
250 | 
251 | 	d1 = read.table("Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.1.Q")
252 | 	d2 = read.table("Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.2.Q")
253 | 	d3 = read.table("Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.3.Q")
254 | 	d4 = read.table("Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.4.Q")
255 | 	d5 = read.table("Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.5.Q")
256 | 	d6 = read.table("Massoko_Dryad_VCF_final_subset_noIndels_maf05_thinned1K.6.Q")
257 | 
258 | To plot the results::
259 | 
260 | 	par(mfrow=c(6,1), mar=c(1,4,1,1))
261 | 	barplot(t(as.matrix(d1)), col=rainbow(1), border=NA)
262 | 	barplot(t(as.matrix(d2)), col=rainbow(2), border=NA)
263 | 	barplot(t(as.matrix(d3)), col=rainbow(3), border=NA)
264 | 	barplot(t(as.matrix(d4)), col=rainbow(4), border=NA)
265 | 	barplot(t(as.matrix(d5)), col=rainbow(5), border=NA)
266 | 	par(mar=c(3,4,1,1))
267 | 	x = barplot(t(as.matrix(d6)), col=rainbow(6), border=NA)
268 | 	inds = c(rep('A_cal', 3), rep('Ita', 3), rep('B', 10), rep('L', 10), rep('S', 10))
269 | 	mtext(inds, 1, at=x, las=2)
270 | 
271 | What's going on here? Based on all the results you have seen from the phylogeny, the PCA, and this, how would you characterize the differentiation between these morphs?
272 | 
273 | ========= 
274 | Resources
275 | ========= 
276 | Population Genetics Books
277 | ~~~~~~~~~~~~~~~~~~~~~~~~~
278 | - Coop's Class Notes: http://cooplab.github.io/popgen-notes/
279 | - Felsenstein's Book: http://evolution.genetics.washington.edu/pgbook/pgbook.html
280 | - Gillespie's *Population Genetics: A Concise Guide*
281 | - Hartl and Clark's *Principles of Population Genetics*
282 | - Nielsen and Slatkin's *An Introduction to Population Genetics*
283 | - Wakeley's *Coalescent Theory*
284 | - Yang's *Computational Molecular Evolution*
285 | 
286 | Great set of tutorials
287 | ~~~~~~~~~~~~~~~~~~~~~~
288 | - http://evomics.org/learning/population-and-speciation-genomics/
289 | - http://grunwaldlab.github.io/Population_Genetics_in_R/Preface.html
290 | 
291 | Papers on population genomics
292 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
293 | - *A framework for variation discovery and genotyping using next-generation DNA sequencing data*, DePristo et al 2010; 10.1038/ng.806
294 | - *Genome sequencing and population genomics in non-model organisms*, Ellegren 2014; 10.1016/j.tree.2013.09.008
295 | - *Genotype and SNP calling from next-generation sequencing data*, Nielsen et al 2011; 10.1038/nrg2986
296 | - *Methods and models for unravelling human evolutionary history*, Schraiber and Akey 2015; 10.1038/nrg4005
297 | - *Population Genomics: Whole-Genome Analysis of Polymorphism and Divergence in Drosophila simulans*, Begun et al 2007; 10.1371/journal.pbio.0050310
298 | - *The power and promise of population genomics: from genotyping to genome typing*, Luikart et al 2003; 10.1038/nrg1226
299 | 
300 | Software & Programs for working with data
301 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
302 | - http://pngu.mgh.harvard.edu/~purcell/plink/index.shtml; great for quality filtering and simple parsing of variants 
303 | - https://github.com/thibautjombart/adegenet/wiki; R package that can parse variant data
304 | - https://vcftools.github.io/index.html; can generate many useful statistics from VCF files
305 | - https://cran.r-project.org/web/packages/PopGenome/index.html; R package that calculates statistics from VCFs, note not very transparent in how it handles missing data
306 | - http://vcf.iobio.io/; allows quick visualization of VCFs
307 | - http://popgen.dk/wiki/index.php/ANGSD; ideal for low coverage data
308 | 
309 | Learn Python
310 | ~~~~~~~~~~~~
311 | - https://github.com/singhal/python_workshop/blob/master/Python.Md
312 | - http://learnpythonthehardway.org/
313 | - https://www.coursera.org/course/pythonlearn
314 | - http://rosalind.info/problems/locations/
315 | 
316 | Learn R
317 | ~~~~~~~
318 | - http://tryr.codeschool.com/
319 | - https://www.coursera.org/learn/r-programming
320 | - https://www.edx.org/course/introduction-r-data-science-microsoft-dat204x-1
321 | - http://swirlstats.com/students.html
322 | - http://r4ds.had.co.nz/
323 | 
324 | Learn Shell / Unix
325 | ~~~~~~~~~~~~~~~~~~
326 | - https://www.codecademy.com/learn/learn-the-command-line
327 | - http://korflab.ucdavis.edu/unix_and_Perl/
328 | - http://www.learnshell.org/
329 | 
330 | Learn Perl
331 | ~~~~~~~~~~
332 | - http://korflab.ucdavis.edu/unix_and_Perl/
333 | 


--------------------------------------------------------------------------------