├── img
    ├── MA1_affinities.png
    ├── f3_phylogeny.png
    ├── f4_phylogeny.png
    ├── Patterson_2012_ANEfig.png
    ├── Patterson_2012_table.png
    └── outgroupf3_phylogeny.png
├── .gitignore
├── f4_param.txt
├── adm_f3_popfile.txt
├── adm_f3_param.txt
├── outgroup_f3_param_Han.txt
├── outgroup_f3_param_MA1.txt
├── f4_popfile.txt
├── pca.AllEurasia.params.txt
├── pca.WestEurasia.params.txt
├── supp
    ├── tasks.sh
    ├── convertf_param.txt
    ├── WestEurasia.poplist.txt
    ├── AllEurasia.poplist.txt
    └── poplist.txt
├── outgroup_f3_popfile_Han.txt
├── outgroup_f3_popfile_MA1.txt
├── README.md
├── population_frequencies.txt
├── f3_outgroup_stats_Han.txt
├── f3_outgroup_stats_MA1.txt
├── 04_Rmd_plotting_pca.Rmd
├── 03_Rmd_smartpca.Rmd
├── pca.WestEurasia.eval
├── 01_bashnb_getting_started.ipynb
├── 03_bashnb_smartpca.ipynb
├── 05_Rmd_fstatistics.Rmd
└── pca.AllEurasia.eval


/img/MA1_affinities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/popgen_course/master/img/MA1_affinities.png


--------------------------------------------------------------------------------
/img/f3_phylogeny.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/popgen_course/master/img/f3_phylogeny.png


--------------------------------------------------------------------------------
/img/f4_phylogeny.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/popgen_course/master/img/f4_phylogeny.png


--------------------------------------------------------------------------------
/img/Patterson_2012_ANEfig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/popgen_course/master/img/Patterson_2012_ANEfig.png


--------------------------------------------------------------------------------
/img/Patterson_2012_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/popgen_course/master/img/Patterson_2012_table.png


--------------------------------------------------------------------------------
/img/outgroupf3_phylogeny.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nevrome/popgen_course/master/img/outgroupf3_phylogeny.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .ipynb_checkpoints/
 2 | 
 3 | # rendered documents
 4 | *.html
 5 | 
 6 | # data
 7 | data/*
 8 | 
 9 | # R
10 | .Rhistory
11 | 


--------------------------------------------------------------------------------
/f4_param.txt:
--------------------------------------------------------------------------------
1 | genotypename:   /data/popgen_course/HumanOrigins_FennoScandian_small.geno
2 | snpname:        /data/popgen_course/HumanOrigins_FennoScandian_small.snp
3 | indivname:      /data/popgen_course/HumanOrigins_FennoScandian_small.ind
4 | popfilename:    f4_popfile.txt
5 | f4mode: YES


--------------------------------------------------------------------------------
/adm_f3_popfile.txt:
--------------------------------------------------------------------------------
1 | Nganasan French Finnish
2 | Nganasan Icelandic Finnish
3 | Nganasan Lithuanian Finnish
4 | Nganasan Norwegian Finnish
5 | BolshoyOleniOstrov French Finnish
6 | BolshoyOleniOstrov Icelandic Finnish
7 | BolshoyOleniOstrov Lithuanian Finnish
8 | BolshoyOleniOstrov Norwegian Finnish


--------------------------------------------------------------------------------
/adm_f3_param.txt:
--------------------------------------------------------------------------------
1 | genotypename:   /data/popgen_course/HumanOrigins_FennoScandian_small.geno
2 | snpname:        /data/popgen_course/HumanOrigins_FennoScandian_small.snp
3 | indivname:      /data/popgen_course/HumanOrigins_FennoScandian_small.ind
4 | popfilename:    adm_f3_popfile.txt
5 | inbreed: YES


--------------------------------------------------------------------------------
/outgroup_f3_param_Han.txt:
--------------------------------------------------------------------------------
1 | genotypename:   /data/popgen_course/HumanOrigins_FennoScandian_small.geno
2 | snpname:        /data/popgen_course/HumanOrigins_FennoScandian_small.snp
3 | indivname:      /data/popgen_course/HumanOrigins_FennoScandian_small.ind
4 | popfilename:    outgroup_f3_popfile_Han.txt


--------------------------------------------------------------------------------
/outgroup_f3_param_MA1.txt:
--------------------------------------------------------------------------------
1 | genotypename:   /data/popgen_course/HumanOrigins_FennoScandian_small.geno
2 | snpname:        /data/popgen_course/HumanOrigins_FennoScandian_small.snp
3 | indivname:      /data/popgen_course/HumanOrigins_FennoScandian_small.ind
4 | popfilename:    outgroup_f3_popfile_MA1.txt


--------------------------------------------------------------------------------
/f4_popfile.txt:
--------------------------------------------------------------------------------
1 | Mbuti Nganasan French Finnish
2 | Mbuti Nganasan Icelandic Finnish
3 | Mbuti Nganasan Lithuanian Finnish
4 | Mbuti Nganasan Norwegian Finnish
5 | Mbuti BolshoyOleniOstrov French Finnish
6 | Mbuti BolshoyOleniOstrov Icelandic Finnish
7 | Mbuti BolshoyOleniOstrov Lithuanian Finnish
8 | Mbuti BolshoyOleniOstrov Norwegian Finnish


--------------------------------------------------------------------------------
/pca.AllEurasia.params.txt:
--------------------------------------------------------------------------------
 1 | genotypename: /data/popgen_course/genotypes_small.geno
 2 | snpname: /data/popgen_course/genotypes_small.snp
 3 | indivname: /data/popgen_course/genotypes_small.ind
 4 | evecoutname: pca.AllEurasia.evec
 5 | evaloutname: pca.AllEurasia.eval
 6 | poplistname: /data/popgen_course/AllEurasia.poplist.txt
 7 | lsqproject: YES
 8 | numoutevec: 4
 9 | numthreads: 1
10 | 


--------------------------------------------------------------------------------
/pca.WestEurasia.params.txt:
--------------------------------------------------------------------------------
 1 | genotypename: data/popgen_course/genotypes_small.geno
 2 | snpname: data/popgen_course/genotypes_small.snp
 3 | indivname: data/popgen_course/genotypes_small.ind
 4 | evecoutname: pca.WestEurasia.evec
 5 | evaloutname: pca.WestEurasia.eval
 6 | poplistname: data/popgen_course/WestEurasia.poplist.txt
 7 | lsqproject: YES
 8 | numoutevec: 4
 9 | numthreads: 1
10 | 


--------------------------------------------------------------------------------
/supp/tasks.sh:
--------------------------------------------------------------------------------
 1 | OUT=~/Data/workshop_dataset_prep
 2 | mkdir -p $OUT
 3 | 
 4 | #Copy genotyping data from Thiseas:
 5 | D=/projects1/AncientFinnish/DataFreeze20_07_17/results/calls/PublishedOnly.HO.1240K.Ancients+Saami
 6 | scp sdag:$D.ind $OUT/HumanOrigins_FennoScandian.ind
 7 | scp sdag:$D.geno $OUT/HumanOrigins_FennoScandian.geno
 8 | scp sdag:$D.snp $OUT/HumanOrigins_FennoScandian.snp
 9 | 
10 | # Extract smaller dataset
11 | convertf -p convertf_param.txt
12 | 
13 | 


--------------------------------------------------------------------------------
/supp/convertf_param.txt:
--------------------------------------------------------------------------------
1 | genotypename:    /Users/schiffels/Data/workshop_dataset_prep/HumanOrigins_FennoScandian.geno
2 | snpname:         /Users/schiffels/Data/workshop_dataset_prep/HumanOrigins_FennoScandian.snp
3 | indivname:       /Users/schiffels/Data/workshop_dataset_prep/HumanOrigins_FennoScandian.ind
4 | outputformat:    EIGENSTRAT
5 | genotypeoutname: /Users/schiffels/Data/workshop_dataset_prep/HumanOrigins_FennoScandian_small.geno
6 | snpoutname:      /Users/schiffels/Data/workshop_dataset_prep/HumanOrigins_FennoScandian_small.snp
7 | indivoutname:    /Users/schiffels/Data/workshop_dataset_prep/HumanOrigins_FennoScandian_small.ind
8 | poplistname:     poplist.txt
9 | 


--------------------------------------------------------------------------------
/outgroup_f3_popfile_Han.txt:
--------------------------------------------------------------------------------
 1 | Han Chuvash Mbuti
 2 | Han Albanian Mbuti
 3 | Han Armenian Mbuti
 4 | Han Bulgarian Mbuti
 5 | Han Czech Mbuti
 6 | Han Druze Mbuti
 7 | Han English Mbuti
 8 | Han Estonian Mbuti
 9 | Han Finnish Mbuti
10 | Han French Mbuti
11 | Han Georgian Mbuti
12 | Han Greek Mbuti
13 | Han Hungarian Mbuti
14 | Han Icelandic Mbuti
15 | Han Italian_North Mbuti
16 | Han Italian_South Mbuti
17 | Han Lithuanian Mbuti
18 | Han Maltese Mbuti
19 | Han Mordovian Mbuti
20 | Han Norwegian Mbuti
21 | Han Orcadian Mbuti
22 | Han Russian Mbuti
23 | Han Sardinian Mbuti
24 | Han Scottish Mbuti
25 | Han Sicilian Mbuti
26 | Han Spanish_North Mbuti
27 | Han Spanish Mbuti
28 | Han Ukrainian Mbuti
29 | Han Levanluhta Mbuti
30 | Han BolshoyOleniOstrov Mbuti
31 | Han ChalmnyVarre Mbuti
32 | Han Saami.DG Mbuti


--------------------------------------------------------------------------------
/outgroup_f3_popfile_MA1.txt:
--------------------------------------------------------------------------------
 1 | MA1_HG.SG Chuvash Mbuti
 2 | MA1_HG.SG Albanian Mbuti
 3 | MA1_HG.SG Armenian Mbuti
 4 | MA1_HG.SG Bulgarian Mbuti
 5 | MA1_HG.SG Czech Mbuti
 6 | MA1_HG.SG Druze Mbuti
 7 | MA1_HG.SG English Mbuti
 8 | MA1_HG.SG Estonian Mbuti
 9 | MA1_HG.SG Finnish Mbuti
10 | MA1_HG.SG French Mbuti
11 | MA1_HG.SG Georgian Mbuti
12 | MA1_HG.SG Greek Mbuti
13 | MA1_HG.SG Hungarian Mbuti
14 | MA1_HG.SG Icelandic Mbuti
15 | MA1_HG.SG Italian_North Mbuti
16 | MA1_HG.SG Italian_South Mbuti
17 | MA1_HG.SG Lithuanian Mbuti
18 | MA1_HG.SG Maltese Mbuti
19 | MA1_HG.SG Mordovian Mbuti
20 | MA1_HG.SG Norwegian Mbuti
21 | MA1_HG.SG Orcadian Mbuti
22 | MA1_HG.SG Russian Mbuti
23 | MA1_HG.SG Sardinian Mbuti
24 | MA1_HG.SG Scottish Mbuti
25 | MA1_HG.SG Sicilian Mbuti
26 | MA1_HG.SG Spanish_North Mbuti
27 | MA1_HG.SG Spanish Mbuti
28 | MA1_HG.SG Ukrainian Mbuti
29 | MA1_HG.SG Levanluhta Mbuti
30 | MA1_HG.SG BolshoyOleniOstrov Mbuti
31 | MA1_HG.SG ChalmnyVarre Mbuti
32 | MA1_HG.SG Saami.DG Mbuti


--------------------------------------------------------------------------------
/supp/WestEurasia.poplist.txt:
--------------------------------------------------------------------------------
 1 | Chuvash
 2 | Abkhasian
 3 | Adygei
 4 | Albanian
 5 | Armenian
 6 | Assyrian
 7 | Balkar
 8 | Basque
 9 | BedouinA
10 | BedouinB
11 | Belarusian
12 | Bulgarian
13 | Canary_Islander
14 | Chechen
15 | Croatian
16 | Cypriot
17 | Czech
18 | Druze
19 | English
20 | Estonian
21 | Finnish
22 | French
23 | Georgian
24 | German
25 | Greek
26 | Hungarian
27 | Icelandic
28 | Iranian
29 | Irish
30 | Irish_Ulster
31 | Italian_North
32 | Italian_South
33 | Jew_Ashkenazi
34 | Jew_Georgian
35 | Jew_Iranian
36 | Jew_Iraqi
37 | Jew_Libyan
38 | Jew_Moroccan
39 | Jew_Tunisian
40 | Jew_Turkish
41 | Jew_Yemenite
42 | Jordanian
43 | Kumyk
44 | Lebanese_Christian
45 | Lebanese
46 | Lebanese_Muslim
47 | Lezgin
48 | Lithuanian
49 | Maltese
50 | Mordovian
51 | North_Ossetian
52 | Norwegian
53 | Orcadian
54 | Palestinian
55 | Polish
56 | Romanian
57 | Russian
58 | Sardinian
59 | Saudi
60 | Scottish
61 | Shetlandic
62 | Sicilian
63 | Sorb
64 | Spanish_North
65 | Spanish
66 | Syrian
67 | Turkish
68 | Ukrainian
69 | 


--------------------------------------------------------------------------------
/supp/AllEurasia.poplist.txt:
--------------------------------------------------------------------------------
  1 | Abkhasian
  2 | Adygei
  3 | Albanian
  4 | Aleut
  5 | Aleut_Tlingit
  6 | Altaian
  7 | Ami
  8 | Armenian
  9 | Assyrian
 10 | Atayal
 11 | Avar
 12 | Azeri
 13 | Balkar
 14 | Basque
 15 | BedouinA
 16 | BedouinB
 17 | Belarusian
 18 | Borneo
 19 | Bulgarian
 20 | Buryat
 21 | Cambodian
 22 | Chechen
 23 | Chukchi
 24 | Chukchi1
 25 | Chuvash
 26 | Croatian
 27 | Cypriot
 28 | Czech
 29 | Dai
 30 | Daur
 31 | Dolgan
 32 | Druze
 33 | English
 34 | Eskimo_ChaplinSireniki
 35 | Eskimo_Naukan
 36 | Estonian
 37 | Even
 38 | Finnish
 39 | French
 40 | Georgian
 41 | German
 42 | Greek
 43 | Han
 44 | Hezhen
 45 | Hungarian
 46 | Icelandic
 47 | Iranian
 48 | Italian_North
 49 | Italian_South
 50 | Itelmen
 51 | Japanese
 52 | Jew_Ashkenazi
 53 | Jew_Georgian
 54 | Jew_Iranian
 55 | Jew_Iraqi
 56 | Jew_Libyan
 57 | Jew_Moroccan
 58 | Jew_Tunisian
 59 | Jew_Turkish
 60 | Jew_Yemenite
 61 | Jordanian
 62 | Kalmyk
 63 | Kinh
 64 | Korean
 65 | Koryak
 66 | Kumyk
 67 | Kurd
 68 | Kyrgyz
 69 | Lahu
 70 | Lebanese
 71 | Lezgin
 72 | Lithuanian
 73 | Maltese
 74 | Mansi
 75 | Miao
 76 | Mongol
 77 | Mongola
 78 | Mordovian
 79 | Naxi
 80 | Nganasan
 81 | Nogai
 82 | North_Ossetian
 83 | Norwegian
 84 | Orcadian
 85 | Oroqen
 86 | Ossetian
 87 | Palestinian
 88 | Polish
 89 | Russian
 90 | Saami.DG
 91 | Saami_WGA
 92 | Sardinian
 93 | Saudi
 94 | Scottish
 95 | Selkup
 96 | Semende
 97 | She
 98 | Sherpa.DG
 99 | Sicilian
100 | Spanish
101 | Spanish_North
102 | Syrian
103 | Tajik
104 | Thai
105 | Tibetan.DG
106 | Tu
107 | Tubalar
108 | Tujia
109 | Turkish
110 | Turkmen
111 | Tuvinian
112 | Ukrainian
113 | Ulchi
114 | Uygur
115 | Uzbek
116 | Xibo
117 | Yakut
118 | Yi
119 | Yukagir
120 | 


--------------------------------------------------------------------------------
/supp/poplist.txt:
--------------------------------------------------------------------------------
  1 | Abkhasian
  2 | Adygei
  3 | Albanian
  4 | Aleut
  5 | Aleut_Tlingit
  6 | Altaian
  7 | Ami
  8 | Armenian
  9 | Assyrian
 10 | Atayal
 11 | Avar
 12 | Azeri
 13 | Balkar
 14 | Basque
 15 | BedouinA
 16 | BedouinB
 17 | Belarusian
 18 | BolshoyOleniOstrov
 19 | Borneo
 20 | Bulgarian
 21 | Buryat
 22 | Cambodian
 23 | Canary_Islander
 24 | ChalmnyVarre
 25 | Chechen
 26 | Chukchi
 27 | Chukchi1
 28 | Chuvash
 29 | Croatian
 30 | Cypriot
 31 | Czech
 32 | Dai
 33 | Daur
 34 | Dolgan
 35 | Druze
 36 | English
 37 | Eskimo_ChaplinSireniki
 38 | Eskimo_Naukan
 39 | Estonian
 40 | Even
 41 | Finnish
 42 | French
 43 | Georgian
 44 | German
 45 | Greek
 46 | Han
 47 | Hezhen
 48 | Hungarian
 49 | Icelandic
 50 | Iranian
 51 | Irish
 52 | Irish_Ulster
 53 | Italian_North
 54 | Italian_South
 55 | Itelmen
 56 | Japanese
 57 | Jew_Ashkenazi
 58 | Jew_Georgian
 59 | Jew_Iranian
 60 | Jew_Iraqi
 61 | Jew_Libyan
 62 | Jew_Moroccan
 63 | Jew_Tunisian
 64 | Jew_Turkish
 65 | Jew_Yemenite
 66 | Jordanian
 67 | Kalmyk
 68 | Kinh
 69 | Korean
 70 | Koryak
 71 | Kumyk
 72 | Kurd
 73 | Kyrgyz
 74 | Lahu
 75 | Lebanese
 76 | Lebanese_Christian
 77 | Lebanese_Muslim
 78 | Levanluhta
 79 | Levanluhta_Outlier
 80 | Lezgin
 81 | LBK_EN
 82 | Lithuanian
 83 | Maltese
 84 | Mansi
 85 | MA1_HG.SG
 86 | Mbuti
 87 | Miao
 88 | Mongol
 89 | Mongola
 90 | Mordovian
 91 | Naxi
 92 | Nganasan
 93 | Nogai
 94 | North_Ossetian
 95 | Norwegian
 96 | Orcadian
 97 | Oroqen
 98 | Ossetian
 99 | Palestinian
100 | Polish
101 | Romanian
102 | Russian
103 | Saami.DG
104 | Saami_WGA
105 | Sardinian
106 | Saudi
107 | Scottish
108 | Selkup
109 | Semende
110 | She
111 | Sherpa.DG
112 | Shetlandic
113 | Sicilian
114 | Sorb
115 | Spanish
116 | Spanish_North
117 | Syrian
118 | Tajik
119 | Thai
120 | Tibetan.DG
121 | Tu
122 | Tubalar
123 | Tujia
124 | Turkish
125 | Turkmen
126 | Tuvinian
127 | Ukrainian
128 | Ulchi
129 | Uygur
130 | Uzbek
131 | WHG
132 | Xibo
133 | Yakut
134 | Yamnaya_Samara
135 | Yi
136 | Yukagir


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # popgen_course
 2 | A course with Jupyter Notebooks for Computational Population Genetics
 3 | 
 4 | by Stephan Schiffels
 5 | 
 6 | *Disclaimer: This is still work in progress.*
 7 | 
 8 | This repository contains several Jupyter Notebooks that I have used in the past for teaching various elements of population-genetic data analyses to students with no initial training in population genetics or Unix-based data analysis. It is definitely not yet fully self-contained and needs an experienced instructor to go through.
 9 | 
10 | Having said that, for someone reasonably experienced with Unix, it is possible to go through the chapters yourself. In that case, here are a few steps for settup up your enviroment to make these work:
11 | 
12 | 1. Install [Jupyter](https://jupyter.org) notebooks with [Bash extension](https://github.com/takluyver/bash_kernel). You will also need [Eigensoft](https://github.com/DReichLab/EIG) and [ADMIXTOOLS](https://github.com/DReichLab/AdmixTools).
13 | 2. Clone this repository in your home directory running `git clone https://github.com/stschiff/popgen_course.git`
14 | 3. Download the genotype data needed for these exercises from [here](https://oc.gnz.mpg.de/owncloud/index.php/s/dT9KzFhLfunk3Tb). In my notebooks, I assume that this data has been downloaded into the directory `/data/popgen_course`.
15 | 
16 | Having Jupyter installed, you can now simply open the Notebooks directly from within Jupyter, or you can access static versions of them here:
17 | 
18 | 1. [Getting Started (Bash)](https://nbviewer.jupyter.org/github/stschiff/popgen_course/blob/master/01_bashnb_getting_started.ipynb)
19 | 2. [Getting Started (Python)](https://nbviewer.jupyter.org/github/stschiff/popgen_course/blob/master/02_pynb_getting_started.ipynb)
20 | 3. [Principal Components Analysis (Bash)](https://nbviewer.jupyter.org/github/stschiff/popgen_course/blob/master/03_bashnb_smartpca.ipynb)
21 | 4. [Principal Components Analysis (Python)](https://nbviewer.jupyter.org/github/stschiff/popgen_course/blob/master/04_pynb_plotting_pca.ipynb)
22 | 5. [F Statistics (Python)](https://nbviewer.jupyter.org/github/stschiff/popgen_course/blob/master/05_pynb_fstatistics.ipynb)
23 | 
24 | 


--------------------------------------------------------------------------------
/population_frequencies.txt:
--------------------------------------------------------------------------------
  1 |       9 Abkhasian
  2 |      16 Adygei
  3 |       6 Albanian
  4 |       7 Aleut
  5 |       4 Aleut_Tlingit
  6 |       7 Altaian
  7 |      10 Ami
  8 |      10 Armenian
  9 |       9 Atayal
 10 |      10 Balkar
 11 |      29 Basque
 12 |      25 BedouinA
 13 |      19 BedouinB
 14 |      10 Belarusian
 15 |       6 BolshoyOleniOstrov
 16 |       9 Borneo
 17 |      10 Bulgarian
 18 |       8 Cambodian
 19 |       2 Canary_Islander
 20 |       2 ChalmnyVarre
 21 |       9 Chechen
 22 |      20 Chukchi
 23 |       3 Chukchi1
 24 |      10 Chuvash
 25 |      10 Croatian
 26 |       8 Cypriot
 27 |      10 Czech
 28 |      10 Dai
 29 |       9 Daur
 30 |       4 Dolgan
 31 |      39 Druze
 32 |      10 English
 33 |      10 Estonian
 34 |       9 Even
 35 |       8 Finnish
 36 |      32 French
 37 |      10 Georgian
 38 |      20 Greek
 39 |      43 Han
 40 |       8 Hezhen
 41 |      20 Hungarian
 42 |      12 Icelandic
 43 |       8 Iranian
 44 |      20 Italian_North
 45 |       1 Italian_South
 46 |       6 Itelmen
 47 |      29 Japanese
 48 |       7 Jew_Ashkenazi
 49 |       7 Jew_Georgian
 50 |       9 Jew_Iranian
 51 |       6 Jew_Iraqi
 52 |       9 Jew_Libyan
 53 |       6 Jew_Moroccan
 54 |       7 Jew_Tunisian
 55 |       8 Jew_Turkish
 56 |       8 Jew_Yemenite
 57 |       1 JK2065
 58 |       9 Jordanian
 59 |      10 Kalmyk
 60 |       8 Kinh
 61 |       6 Korean
 62 |       9 Koryak
 63 |       8 Kumyk
 64 |       9 Kyrgyz
 65 |       8 Lahu
 66 |      14 LBK_EN
 67 |       8 Lebanese
 68 |       2 Levanluhta
 69 |       9 Lezgin
 70 |      10 Lithuanian
 71 |       8 Maltese
 72 |       8 Mansi
 73 |      10 Miao
 74 |       6 Mongola
 75 |      10 Mordovian
 76 |       9 Naxi
 77 |      11 Nganasan
 78 |       9 Nogai
 79 |      11 Norwegian
 80 |      13 Orcadian
 81 |       9 Oroqen
 82 |      10 Ossetian
 83 |      38 Palestinian
 84 |      22 Russian
 85 |       2 Saami.DG
 86 |       1 Saami_WGA
 87 |      27 Sardinian
 88 |       8 Saudi
 89 |       4 Scottish
 90 |      10 Selkup
 91 |      10 Semende
 92 |      10 She
 93 |       2 Sherpa.DG
 94 |      11 Sicilian
 95 |      53 Spanish
 96 |       5 Spanish_North
 97 |       8 Syrian
 98 |       8 Tajik
 99 |      10 Thai
100 |       2 Tibetan.DG
101 |      10 Tu
102 |      22 Tubalar
103 |      10 Tujia
104 |      50 Turkish
105 |       7 Turkmen
106 |      10 Tuvinian
107 |       9 Ukrainian
108 |      25 Ulchi
109 |      10 Uygur
110 |      10 Uzbek
111 |       3 WHG
112 |       7 Xibo
113 |      20 Yakut
114 |       9 Yamnaya_Samara
115 |      10 Yi
116 |      19 Yukagir
117 | 


--------------------------------------------------------------------------------
/f3_outgroup_stats_Han.txt:
--------------------------------------------------------------------------------
 1 | result:        Han              Chuvash    Mbuti   0.233652       0.002072     112.782  502678
 2 | result:        Han             Albanian    Mbuti   0.215629       0.002029     106.291  501734
 3 | result:        Han             Armenian    Mbuti   0.213724       0.001963     108.882  504370
 4 | result:        Han            Bulgarian    Mbuti   0.216193       0.001979     109.266  504310
 5 | result:        Han                Czech    Mbuti   0.218060       0.002002     108.939  504089
 6 | result:        Han                Druze    Mbuti   0.209551       0.001919     109.205  510853
 7 | result:        Han              English    Mbuti   0.216959       0.001973     109.954  504161
 8 | result:        Han             Estonian    Mbuti   0.220730       0.002019     109.332  503503
 9 | result:        Han              Finnish    Mbuti   0.223447       0.002044     109.345  502217
10 | result:        Han               French    Mbuti   0.216623       0.001969     110.012  509613
11 | result:        Han             Georgian    Mbuti   0.214295       0.001935     110.721  503598
12 | result:        Han                Greek    Mbuti   0.215203       0.001984     108.465  507475
13 | result:        Han            Hungarian    Mbuti   0.217894       0.001999     109.004  507409
14 | result:        Han            Icelandic    Mbuti   0.218683       0.002015     108.553  504655
15 | result:        Han        Italian_North    Mbuti   0.215332       0.001978     108.854  507589
16 | result:        Han        Italian_South    Mbuti   0.211787       0.002271      93.265  492400
17 | result:        Han           Lithuanian    Mbuti   0.219615       0.002032     108.098  503681
18 | result:        Han              Maltese    Mbuti   0.210359       0.001956     107.542  503985
19 | result:        Han            Mordovian    Mbuti   0.223469       0.002008     111.296  503441
20 | result:        Han            Norwegian    Mbuti   0.218873       0.002023     108.197  504621
21 | result:        Han             Orcadian    Mbuti   0.217773       0.002014     108.115  504993
22 | result:        Han              Russian    Mbuti   0.223993       0.001995     112.274  506525
23 | result:        Han            Sardinian    Mbuti   0.213230       0.001980     107.711  508413
24 | result:        Han             Scottish    Mbuti   0.218489       0.002039     107.145  499784
25 | result:        Han             Sicilian    Mbuti   0.212272       0.001975     107.486  505477
26 | result:        Han        Spanish_North    Mbuti   0.215885       0.002029     106.383  500853
27 | result:        Han              Spanish    Mbuti   0.213869       0.001975     108.297  513648
28 | result:        Han            Ukrainian    Mbuti   0.218716       0.002007     108.950  503981
29 | result:        Han           Levanluhta    Mbuti   0.236252       0.002383      99.123  263049
30 | result:        Han   BolshoyOleniOstrov    Mbuti   0.247814       0.002177     113.849  457102
31 | result:        Han         ChalmnyVarre    Mbuti   0.233499       0.002304     101.345  366220
32 | result:        Han             Saami.DG    Mbuti   0.236198       0.002274     103.852  489038


--------------------------------------------------------------------------------
/f3_outgroup_stats_MA1.txt:
--------------------------------------------------------------------------------
 1 |  result:             MA1_HG.SG              Chuvash                Mbuti      0.243818       0.002349     103.781  350484
 2 |  result:             MA1_HG.SG             Albanian                Mbuti      0.236494       0.002296     103.008  344332
 3 |  result:             MA1_HG.SG             Armenian                Mbuti      0.231399       0.002264     102.229  349612
 4 |  result:             MA1_HG.SG            Bulgarian                Mbuti      0.237498       0.002281     104.103  349800
 5 |  result:             MA1_HG.SG                Czech                Mbuti      0.243224       0.002328     104.457  349553
 6 |  result:             MA1_HG.SG                Druze                Mbuti      0.226740       0.002197     103.193  359004
 7 |  result:             MA1_HG.SG              English                Mbuti      0.243135       0.002317     104.941  349321
 8 |  result:             MA1_HG.SG             Estonian                Mbuti      0.247065       0.002362     104.619  348861
 9 |  result:             MA1_HG.SG              Finnish                Mbuti      0.245684       0.002379     103.266  347208
10 |  result:             MA1_HG.SG               French                Mbuti      0.240235       0.002269     105.886  357842
11 |  result:             MA1_HG.SG             Georgian                Mbuti      0.232645       0.002253     103.243  349082
12 |  result:             MA1_HG.SG                Greek                Mbuti      0.236566       0.002280     103.757  355261
13 |  result:             MA1_HG.SG            Hungarian                Mbuti      0.241720       0.002313     104.483  355340
14 |  result:             MA1_HG.SG            Icelandic                Mbuti      0.244488       0.002386     102.481  350287
15 |  result:             MA1_HG.SG        Italian_North                Mbuti      0.236407       0.002273     104.002  354999
16 |  result:             MA1_HG.SG        Italian_South                Mbuti      0.230839       0.002767      83.427  321217
17 |  result:             MA1_HG.SG           Lithuanian                Mbuti      0.246864       0.002403     102.718  348656
18 |  result:             MA1_HG.SG              Maltese                Mbuti      0.230200       0.002259     101.903  347725
19 |  result:             MA1_HG.SG            Mordovian                Mbuti      0.245284       0.002346     104.571  350058
20 |  result:             MA1_HG.SG            Norwegian                Mbuti      0.243930       0.002301     106.031  350182
21 |  result:             MA1_HG.SG             Orcadian                Mbuti      0.243614       0.002320     105.008  351053
22 |  result:             MA1_HG.SG              Russian                Mbuti      0.245212       0.002298     106.698  355953
23 |  result:             MA1_HG.SG            Sardinian                Mbuti      0.231967       0.002264     102.449  355548
24 |  result:             MA1_HG.SG             Scottish                Mbuti      0.244598       0.002434     100.512  339441
25 |  result:             MA1_HG.SG             Sicilian                Mbuti      0.231141       0.002260     102.297  351028
26 |  result:             MA1_HG.SG        Spanish_North                Mbuti      0.238479       0.002426      98.319  341661
27 |  result:             MA1_HG.SG              Spanish                Mbuti      0.235386       0.002257     104.293  361951
28 |  result:             MA1_HG.SG            Ukrainian                Mbuti      0.243551       0.002345     103.881  348948
29 |  result:             MA1_HG.SG           Levanluhta                Mbuti      0.247640       0.003030      81.728  174148
30 |  result:             MA1_HG.SG   BolshoyOleniOstrov                Mbuti      0.256041       0.002624      97.561  305851
31 |  result:             MA1_HG.SG         ChalmnyVarre                Mbuti      0.249619       0.002862      87.212  239594
32 |  result:             MA1_HG.SG             Saami.DG                Mbuti      0.251530       0.002622      95.922  326072
33 | 


--------------------------------------------------------------------------------
/04_Rmd_plotting_pca.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Principal Component Plots"
  3 | output: html_document
  4 | editor_options: 
  5 |   chunk_output_type: console
  6 | ---
  7 | 
  8 | ```{r, echo=FALSE}
  9 | knitr::opts_chunk$set(message = FALSE)
 10 | ```
 11 | 
 12 | ```{r}
 13 | library(magrittr)
 14 | ```
 15 | 
 16 | For this chapter, you will need the PCA results that we ran in the last chapter. I have actually included the output files of my runs into this repository, so you can just use them if something didn't work in the previous chapter.
 17 | 
 18 | For making plots in python, one of the most popular libaries around is ggplot2. You can load it via:
 19 | 
 20 | ```{r}
 21 | library(ggplot2)
 22 | ```
 23 | 
 24 | Let's have a look at the main results file from smartpca:
 25 | 
 26 | ```{r, warning=F}
 27 | pcaDat <- readr::read_delim("pca.WestEurasia.evec", " ", trim_ws = T)
 28 | pcaDat2 <- readr::read_delim("pca.AllEurasia.evec", " ", trim_ws = T)
 29 | ```
 30 | 
 31 | The first row contains the eigenvalues for the first 4 principal components (PCs), and all further rows contain the PC coordinates for each individual. The first column contains the name of each individual, the last row the population. To load this dataset with R, we use the readr package. To load data using readr, we used the read_delim() function. We can now change the column headers:
 32 | 
 33 | ```{r}
 34 | colnames(pcaDat) <- colnames(pcaDat2) <- c("Name", "PC1", "PC2", "PC3", "PC4", "Group")
 35 | ```
 36 | 
 37 | Looking at the data, we find that it is a tibble (a better data.frame), with each individual on one row, and the columns denoting the first 4 principal components. The last column contains the population for each individual:
 38 | 
 39 | ```{r}
 40 | pcaDat
 41 | ```
 42 | 
 43 | We can quickly plot the first two PCs for all individuals:
 44 | 
 45 | ```{r}
 46 | pcaDat %>%
 47 |   ggplot() +
 48 |   geom_point(aes(x = PC1, y = PC2))
 49 | ```
 50 | 
 51 | which is not very helpful, because we can't see where each population falls. We can highlight a few populations to get a bit more of a feeling:
 52 | 
 53 | ```{r}
 54 | ggplot() +
 55 |   geom_point(
 56 |     data = pcaDat %>% dplyr::filter(!(Group %in% c("Finnish", "Sardinian", "Armenian", "BedouinB"))), 
 57 |     aes(x = PC1, y = PC2)
 58 |   ) +
 59 |   geom_point(
 60 |     data = pcaDat %>% dplyr::filter(Group %in% c("Finnish", "Sardinian", "Armenian", "BedouinB")), 
 61 |     aes(x = PC1, y = PC2, color = Group)
 62 |   )
 63 | ```
 64 | 
 65 | ## Showing all populations
 66 | 
 67 | OK, but how do we systematically show all the populations? There are too many of those to separate them all by different colors, or by different symbols, so we need to combine colours and symbols and use all the combinations of them to show all the populations.
 68 | 
 69 | ```{r}
 70 | populations <- readr::read_csv("data/popgen_course/WestEurasia.poplist.txt", col_names = F)$X1
 71 | ```
 72 | 
 73 | ```{r, fig.height=10}
 74 | pcaDat %>%
 75 |   dplyr::filter(Group %in% populations) %>%
 76 |   ggplot() +
 77 |   geom_point(aes(
 78 |     x = PC1, y = PC2, 
 79 |     color = Group, shape = Group
 80 |   )) + 
 81 |   scale_shape_manual(values = rep(0:18, len = 57)) + 
 82 |   theme(legend.position = "bottom")
 83 | ```
 84 | 
 85 | ## Adding ancient populations
 86 | 
 87 | Of course, until now we haven't yet included any of the actual ancient test individuals that we want to analyse.
 88 | 
 89 | We add the following ancient populations to this plot:
 90 | 
 91 | - Levanluhta (two individuals from Finland from the first millenium AD)
 92 | - BolshoyOleniOstrov (a group of 3500 year old individuals from Northern Russia).
 93 | - WHG (short for Western Hunter-Gatherers, about 8000 years ago)
 94 | - LBK_EN (short for Linearbandkeramik Early Neolithic, from about 6,000 years ago)
 95 | - Yamnaya_Samara, a late Neolithic population from the Russian Steppe, about 4,800 years ago.
 96 | 
 97 | The first two populations are from a publication on ancient Fennoscandian genomes ([Lamnidis et al. 2018](https://www.nature.com/articles/s41467-018-07483-5)), and are instructive to understand what PCA can be used for. The latter three populations are from two famous publications ([Lazaridis et al. 2014](https://www.nature.com/articles/nature13673) and [Haak et al. 2015](https://www.nature.com/articles/nature14317)). It can be shown that modern European genetic diversity is formed by a mix of three ancestries represented by these ancient groups. To highlight these ancient populations, we plot them in black and using different symbols. While we're at it, we should also add the population called "Saami.DG":
 98 | 
 99 | ```{r, fig.height=10}
100 | ancient_populations <- c("Levanluhta", "BolshoyOleniOstrov", "WHG", "LBK_EN", "Yamnaya_Samara", "Saami.DG") 
101 | 
102 | ggplot() +
103 |   geom_point(
104 |     data = pcaDat %>% dplyr::filter(Group %in% populations),
105 |     mapping = aes(
106 |       x = PC1, y = PC2, 
107 |       color = Group, shape = Group
108 |     )
109 |   ) + 
110 |   geom_point(
111 |     data = pcaDat %>% dplyr::filter(Group %in% ancient_populations),
112 |     mapping = aes(
113 |       x = PC1, y = PC2
114 |     ),
115 |     color = "black", shape = 15
116 |   ) + 
117 |   ggrepel::geom_label_repel(
118 |     data = pcaDat %>% dplyr::filter(Group %in% ancient_populations) %>%
119 |       dplyr::group_by(Group) %>%
120 |       dplyr::summarise(PC1 = mean(PC1), PC2 = mean(PC2)),
121 |     mapping = aes(
122 |       x = PC1, y = PC2, label = Group
123 |     )
124 |   ) +
125 |   scale_shape_manual(values = rep(0:14, len = 57)) +
126 |   theme(legend.position = "bottom")
127 | ```
128 | 
129 | OK, so what are we looking at? This is quite a rich plot, of course, and we won't discuss all the details here. I just want to highlight two things. First, you can see that most present-day Europeans are scattered in a relatively tight space in the center of a triangle span up by the WHG on the lower left, LBK_EN on the lower right (seen from European points) and by Yamnaya_Samara (top). Indeed, a widely-accepted model for present-day Europeans assumes these three ancient source populations for all Europeans ([Lazaridis et al. 2014](https://www.nature.com/articles/nature13673) and [Haak et al. 2015](https://www.nature.com/articles/nature14317)).
130 | 
131 | The second thing that is noteworthy here is that present-day people from Northeastern Europe, such as Finns, Saami and other Uralic speaking populations are "dragged" towards the ancient samples form Bolshoy Oleni Ostrov. Indeed, a recent model published by us assumes that "Siberian" genetic ancestry entered Europe around 4000 years ago as a kind of fourth genetic component on top of the three other components discusseda bove, and is nowadays found in most Uralic speakers in Europe, including Finns, Saami and Estonians.
132 | 
133 | ## East-Eurasian PCA
134 | 
135 | We can make a similar plot using the all-Eurasian PCA that we have run:
136 | 
137 | ```{r}
138 | populations <- readr::read_csv("data/popgen_course/AllEurasia.poplist.txt", col_names = F)$X1
139 | ```
140 | 
141 | ```{r, fig.height=12}
142 | ggplot() +
143 |   geom_point(
144 |     data = pcaDat2 %>% dplyr::filter(Group %in% populations),
145 |     mapping = aes(
146 |       x = PC1, y = PC2, 
147 |       color = Group, shape = Group
148 |     )
149 |   ) + 
150 |   geom_point(
151 |     data = pcaDat2 %>% dplyr::filter(Group %in% ancient_populations),
152 |     mapping = aes(
153 |       x = PC1, y = PC2
154 |     ),
155 |     color = "black", shape = 15
156 |   ) + 
157 |   ggrepel::geom_label_repel(
158 |     data = pcaDat2 %>% dplyr::filter(Group %in% ancient_populations) %>%
159 |       dplyr::group_by(Group) %>%
160 |       dplyr::summarise(PC1 = mean(PC1), PC2 = mean(PC2)),
161 |     mapping = aes(
162 |       x = PC1, y = PC2, label = Group
163 |     )
164 |   ) +
165 |   scale_shape_manual(values = rep(0:14, len = 108)) +
166 |   theme(legend.position = "bottom")
167 | ```
168 | 
169 | This PCA looks quite different. Here, we have all Western-Eurasian groups squished together on the left side of the plot, and on the right we have East-Asian populations. The plot roughly reflects Geography, with Northern East-Asian people such as the Nganasan on the top-right, and Southern East-Asian people like the Taiwanese Ami on the lower right. Here we can now see that the ancient samples from Russia and Finnland, as well as present-day Uralic populations are actually distributed between East and West, contrary to most other Europeans. This confirms that these group in Europe have quite a distinctive East-Asian genetic ancestry, and we found that it is best represented by the Nganasan ([Lamnidis et al. 2018](https://www.nature.com/articles/s41467-018-07483-5)).
170 | 


--------------------------------------------------------------------------------
/03_Rmd_smartpca.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Principal Components Analysis (PCA)"
  3 | output: html_document
  4 | editor_options: 
  5 |   chunk_output_type: console
  6 | ---
  7 | 
  8 | ```{r, echo=FALSE}
  9 | knitr::opts_chunk$set(message = FALSE)
 10 | ```
 11 | 
 12 | ```{r}
 13 | library(magrittr)
 14 | ```
 15 | 
 16 | Principal components analysis (PCA) is one of the most useful techniques to visualise genetic diversity in a dataset. The methodology is not restricted to genetic data, but in general allows breaking down high-dimensional datasets to two or more dimensions for visualisation in a two-dimensional space.
 17 | 
 18 | ## Genotype Data
 19 | 
 20 | This lesson is also our first contact with the genotype data used in this and most of the following lessons. The dataset that we will work with contains 1,340 individuals, each represented by 593,124 single nucleotide polymorphisms (SNPs). Those SNPs have exactly two different alleles, and each individual has one of four possible values at each genotype: homozygous reference, heterozygous, homozygous alternative, or missing. Those four values are encoded 2, 1, 0 and 9 respectively.
 21 | 
 22 | The data is laid out as a matrix, with columns indicating individuals, and rows indicating SNPs. The data itself comes in the so-called \"EIGENSTRAT\" format, which is defined in the [Eigensoft package](https://github.com/DReichLab/EIG) used by many tools used in this workshop. In this format, a genotype dataset consists of three files, usually with the following file endings:
 23 | 
 24 | * `*.snp`: The file containing the SNP positions. It consists of six columns: SNP-name, chromosome, genetic positions, physical position, reference allele, alternative allele.
 25 | * `*.ind`: The file containing the names of the individuals. It consists of three columns: Individual Name, Sex (encoded as M(ale), F(emale), or U(nknown)), and population name.
 26 | * `*.geno`: The file containing the genotype matrix, with individuals laid out from left to right, and SNP positions laid out from top to bottom.
 27 | 
 28 | In the following, we will explore the files using R in this Rmarkdown document.
 29 | 
 30 | The data that we want to analyse is stored at `data/popgen_course`. Let's list the contents of that directory:
 31 | 
 32 | ```{r}
 33 | list.files("data/popgen_course/")
 34 | ```
 35 | 
 36 | Let's explore those files a bit. Here are the first 20 individuals:
 37 | 
 38 | ```{r}
 39 | individuals <- readr::read_delim(
 40 |    "data/popgen_course/genotypes_small.ind", 
 41 |    delim = " ", 
 42 |    trim_ws = T,
 43 |    col_names = c(
 44 |       "name", 
 45 |       "sex", 
 46 |       "population"
 47 |    )
 48 | )
 49 | 
 50 | individuals %>% head(20)
 51 | ```
 52 | 
 53 | And here the first 20 SNP rows:
 54 | 
 55 | ```{r}
 56 | snps <- readr::read_delim(
 57 |    "data/popgen_course/genotypes_small.snp", 
 58 |    delim = " ", 
 59 |    trim_ws = T,
 60 |    col_names = c(
 61 |       "SNP_name", 
 62 |       "chromosome", 
 63 |       "genetic_position", 
 64 |       "physical_position", 
 65 |       "reference_allele", 
 66 |       "alternative_allele"
 67 |    )
 68 | )
 69 | ```
 70 | 
 71 | And here are the first 20 genotypes of the first 50 individuals:
 72 | 
 73 | ```{r}
 74 | geno <- readr::read_lines(
 75 |    "data/popgen_course/genotypes_small.geno",
 76 |    n_max = 20
 77 | )
 78 | 
 79 | geno %>% substr(1, 50)
 80 | ```
 81 | 
 82 | Counting how many individuals and SNPs there are:
 83 | 
 84 | ```{r}
 85 | nrow(individuals)
 86 | nrow(snps)
 87 | ```
 88 | 
 89 | And now we check that the first row of the `*.geno` file indeed contains the same number of columns:
 90 | 
 91 | ```{r}
 92 | nchar(geno[1])
 93 | ```
 94 | 
 95 | Now counting the number of rows in the `*.geno`-file (this takes a few seconds, as the file is several hundred MB large):
 96 | 
 97 | ```{r}
 98 | R.utils::countLines("data/popgen_course/genotypes_small.geno") %>% as.integer()
 99 | ```
100 | 
101 | Great, the number of rows and columns agrees with the numbers indicated in the `*.ind` and `*.snp` file! Now we're counting how many different populations there are. Let's first see the first 10 populations in the sorted list, alongside the number of individuals in each group:
102 | 
103 | ```{r}
104 | individuals %>% 
105 |    dplyr::group_by(population) %>%
106 |    dplyr::count()
107 | ```
108 | 
109 | ## How PCA works
110 | 
111 | To understand how PCA works, consider a single individual and its representation by its 593,124 markers. Formally, each individual is a point in a 593,124-dimensional space, where each dimension can take only the three possible genotypes indicated above, or have missing data. To visualise this high-dimensional dataset, we would like to project it down to two dimensions. But as there are many ways to project the shadow of a three-dimensional object on a two dimensional plane, there are many (and even more) ways to project a 593,124-dimensional cloud of points to two dimensions. What PCA does is figuring out the \"best\" way to do this project in order to visualise the major components of variance in the data.
112 | 
113 | For actually running the analysis, we use a software called `smartPCA` from the [Eigensoft package](https://github.com/DReichLab/EIG). As many other tools from this and related packages, `smartPCA` reads in a parameter file which specifies its input and output files and options. In our case, we want the parameter file to have the following content:
114 | 
115 | ```
116 | genotypename: data/popgen_course/genotypes_small.geno
117 | snpname: data/popgen_course/genotypes_small.snp
118 | indivname: data/popgen_course/genotypes_small.ind
119 | evecoutname: pca.WestEurasia.evec
120 | evaloutname: pca.WestEurasia.eval
121 | poplistname: data/popgen_course/WestEurasia.poplist.txt
122 | lsqproject: YES
123 | numoutevec: 4
124 | numthreads: 1
125 | ```
126 | 
127 | Here, the first three parameters specify the input genotype files. The next two rows specify two output file names, typically with ending `*.evec` and `*.eval`. The parameter line beginning with `poplistname` contains a file with a list of populations used for calculating the principal components (see below). The option `lsqproject` is important for applications including ancient DNA with lots of missing data, which I will not elaborate on. For the purpose of this workshop, you should use `lsqproject: YES`. The next option `numoutevec` specifies the number of principal components that we compute, the last option `numthreads` the number of CPUs to use for this run. We use just one since we're working together on the same computer, so cannot afford everyone running on lots of CPUs.
128 | 
129 | ## Population lists vs. Projection
130 | 
131 | The parameter named `poplistname` is a very crucial one. It specifies the populations whose individuals are used to calculate the principal components. Why not just all of them you ask? For two reasons: First, there are simply too many of them and we don't want to use all of them, since the computation would take too long. More importantly, however, we generally try to avoid using ancient samples to compute principal components, to avoid specific ancient-DNA related artefacts affecting the computation. Finally, the list of populations to use for PCA should be informed by your question. If you're investigating African population structure, in makes no sense to put Asian or European individuals in your population list, since then the main axes of genetic differentiation would not be inside of Africa, but between Africans and Non-Africans.
132 | 
133 | So what happens to individuals that are not in populations listed in the population list? Well, fortunately, they are not just ignored, but \"projected\". This means that after the principal components have been computed, *all* individuals (not just the one in the list) are projected onto these principal components. That way, we can visualise ancient populations in the context of modern genetic variation. While that may sound a bit problematic at first (Some variation in ancient populations is not represented well by modern populations), but it turns out to be nevertheless one of the most useful tools for this purpose. The advantage of avoiding ancient-DNA artefacts and batch effects to affect the visualisation outweighs the disadvantage of missing some private genetic variation components in the ancient populations themselves. Of course, that argument breaks down once the analysed populations become too ancient and detached from modern genetic variation. But for our purposes it will work just fine.
134 | 
135 | For this workshop, I prepared two population lists::
136 | 
137 | ```
138 | data/popgen_course/WestEurasia.poplist.txt
139 | data/popgen_course/AllEurasia.poplist.txt
140 | ```
141 | 
142 | As you can tell from the names of the files, they specify two sets of modern populations representing West Eurasia or all of Europe and Asia, respectively.
143 | 
144 | I recommend to look through both of the population lists and google some population names that you don't recognise to get a feeling for the ethnic groups represented here.
145 | 
146 | ## Running `smartPCA`
147 | 
148 | Now go ahead and open a new text file using your Jupyter Browser, you can name it anything you like. For the sake of a concrete name, let's call it `pca.WestEurasia.params.txt`. Text files in Jupyter are opened in a text editor, so you can then simply copy-paste the above lines into the new file.
149 | 
150 | ```{r}
151 | readr::write_lines(c(
152 |       "genotypename: data/popgen_course/genotypes_small.geno",
153 |       "snpname: data/popgen_course/genotypes_small.snp",
154 |       "indivname: data/popgen_course/genotypes_small.ind",
155 |       "evecoutname: pca.WestEurasia.evec",
156 |       "evaloutname: pca.WestEurasia.eval",
157 |       "poplistname: data/popgen_course/WestEurasia.poplist.txt",
158 |       "lsqproject: YES",
159 |       "numoutevec: 4",
160 |       "numthreads: 1"
161 |    ),
162 |    path = "pca.WestEurasia.params.txt"
163 | )
164 | ```
165 | 
166 | Let's see whether it worked, by printing out the contents of that file into your notebook:
167 | 
168 | ```{r}
169 | readr::read_lines(
170 |    "pca.WestEurasia.params.txt"
171 | )
172 | ```
173 | 
174 | Great, so that's our parameter file for running `smartPCA`.
175 | 
176 | **Note:** that we specified two output files in our parameter file, here called `pca.WestEurasia.evec` and `pca.WestEurasia.eval`. You can actually put any names you want in there. But beware of relative vs. absolute paths. File names starting with `/` are considered \"absolute\", that is, taken to go from the root of the file system. In contrast, filenames not starting with `/` are considered \"relative\" to the current working directory. If you forgot which directory you're in, run `pwd`.
177 | 
178 | **Note:** The option `poplistname` is a crucial one. Here you need to specify which populations are used to compute the eigenvectors of the principal components analysis. In our case, I have prepared two population list files: `data/popgen_course/WestEurasia.poplist.txt` and `data/popgen_course/AllEurasia.poplist.txt`. Pick one of the two to carry on.
179 | 
180 | Good, now we can run `smartPCA`. To do that, it's more convenient to use the terminal than a Rmarkdown file. So open a terminal and run
181 | 
182 | ```
183 | smartpca -p pca.WestEurasia.params.txt
184 | ```
185 | 
186 | This will typically run for about 30 minutes and output lots of logging output to the screen.
187 | 
188 | In a similar manner we can prepare a parameter file for the AllEurasia population list. This is how it should look:
189 | 
190 | ```
191 | genotypename: data/popgen_course/genotypes_small.geno
192 | snpname: data/popgen_course/genotypes_small.snp
193 | indivname: data/popgen_course/genotypes_small.ind
194 | evecoutname: pca.AllEurasia.evec
195 | evaloutname: pca.AllEurasia.eval
196 | poplistname: data/popgen_course/AllEurasia.poplist.txt
197 | lsqproject: YES
198 | numoutevec: 4
199 | numthreads: 1
200 | ```
201 | 
202 | And similar to the command above, we can run pca on the AllEurasia population list via:
203 | 
204 | ```
205 | smartpca -p pca.AllEurasia.params.txt
206 | ```
207 | 
208 | which will run slightly longer than the first one because there are more populations.
209 | 


--------------------------------------------------------------------------------
/pca.WestEurasia.eval:
--------------------------------------------------------------------------------
  1 |     6.288558
  2 |     3.094820
  3 |     2.692703
  4 |     2.010356
  5 |     1.742554
  6 |     1.689588
  7 |     1.626518
  8 |     1.616861
  9 |     1.590569
 10 |     1.589779
 11 |     1.580970
 12 |     1.570292
 13 |     1.562215
 14 |     1.556105
 15 |     1.550321
 16 |     1.543074
 17 |     1.540315
 18 |     1.534597
 19 |     1.524939
 20 |     1.524176
 21 |     1.520778
 22 |     1.511707
 23 |     1.505727
 24 |     1.501742
 25 |     1.494597
 26 |     1.489205
 27 |     1.484356
 28 |     1.477644
 29 |     1.469235
 30 |     1.466139
 31 |     1.464872
 32 |     1.453282
 33 |     1.439786
 34 |     1.436309
 35 |     1.424679
 36 |     1.422132
 37 |     1.416346
 38 |     1.411205
 39 |     1.407204
 40 |     1.402511
 41 |     1.399908
 42 |     1.397946
 43 |     1.395569
 44 |     1.390360
 45 |     1.385084
 46 |     1.378586
 47 |     1.377115
 48 |     1.374029
 49 |     1.370217
 50 |     1.362597
 51 |     1.360695
 52 |     1.359427
 53 |     1.354377
 54 |     1.350525
 55 |     1.344736
 56 |     1.343233
 57 |     1.339959
 58 |     1.338966
 59 |     1.335077
 60 |     1.330144
 61 |     1.323053
 62 |     1.320174
 63 |     1.317857
 64 |     1.315216
 65 |     1.310151
 66 |     1.308080
 67 |     1.305296
 68 |     1.302001
 69 |     1.300860
 70 |     1.289421
 71 |     1.287915
 72 |     1.285110
 73 |     1.279333
 74 |     1.275902
 75 |     1.271351
 76 |     1.269675
 77 |     1.267124
 78 |     1.264680
 79 |     1.262351
 80 |     1.259080
 81 |     1.257861
 82 |     1.255196
 83 |     1.252261
 84 |     1.250030
 85 |     1.247348
 86 |     1.245184
 87 |     1.242175
 88 |     1.239379
 89 |     1.238133
 90 |     1.236569
 91 |     1.232437
 92 |     1.230241
 93 |     1.228187
 94 |     1.225265
 95 |     1.223943
 96 |     1.222205
 97 |     1.219820
 98 |     1.217824
 99 |     1.215598
100 |     1.213455
101 |     1.209457
102 |     1.208494
103 |     1.205137
104 |     1.203316
105 |     1.201609
106 |     1.200145
107 |     1.196033
108 |     1.192887
109 |     1.192256
110 |     1.191376
111 |     1.188856
112 |     1.187536
113 |     1.185889
114 |     1.184822
115 |     1.181227
116 |     1.178798
117 |     1.175043
118 |     1.173146
119 |     1.171438
120 |     1.168925
121 |     1.166938
122 |     1.165445
123 |     1.164524
124 |     1.163296
125 |     1.161620
126 |     1.160886
127 |     1.159673
128 |     1.158730
129 |     1.155633
130 |     1.153882
131 |     1.151985
132 |     1.151777
133 |     1.149913
134 |     1.149450
135 |     1.147317
136 |     1.142969
137 |     1.142523
138 |     1.141568
139 |     1.138510
140 |     1.136462
141 |     1.134942
142 |     1.134773
143 |     1.133566
144 |     1.132394
145 |     1.130553
146 |     1.128126
147 |     1.127649
148 |     1.126379
149 |     1.125571
150 |     1.123615
151 |     1.121786
152 |     1.119842
153 |     1.119581
154 |     1.118005
155 |     1.116301
156 |     1.115939
157 |     1.114271
158 |     1.113468
159 |     1.111258
160 |     1.110785
161 |     1.109414
162 |     1.108017
163 |     1.105467
164 |     1.104533
165 |     1.103341
166 |     1.102456
167 |     1.101475
168 |     1.099997
169 |     1.098662
170 |     1.098401
171 |     1.095494
172 |     1.094875
173 |     1.094238
174 |     1.093169
175 |     1.091754
176 |     1.090332
177 |     1.089938
178 |     1.089230
179 |     1.087557
180 |     1.087194
181 |     1.086377
182 |     1.084882
183 |     1.084031
184 |     1.083202
185 |     1.082490
186 |     1.081516
187 |     1.078116
188 |     1.076987
189 |     1.075117
190 |     1.074305
191 |     1.074135
192 |     1.072921
193 |     1.071640
194 |     1.071192
195 |     1.069853
196 |     1.069550
197 |     1.067037
198 |     1.066036
199 |     1.065113
200 |     1.063452
201 |     1.062919
202 |     1.061595
203 |     1.060710
204 |     1.060346
205 |     1.059358
206 |     1.059034
207 |     1.056998
208 |     1.056422
209 |     1.055168
210 |     1.054756
211 |     1.054162
212 |     1.052285
213 |     1.051447
214 |     1.050835
215 |     1.050287
216 |     1.048713
217 |     1.047416
218 |     1.046505
219 |     1.046476
220 |     1.045557
221 |     1.044029
222 |     1.042965
223 |     1.042316
224 |     1.041413
225 |     1.040081
226 |     1.038227
227 |     1.037778
228 |     1.036910
229 |     1.035252
230 |     1.034431
231 |     1.034115
232 |     1.032947
233 |     1.031785
234 |     1.031263
235 |     1.030600
236 |     1.030253
237 |     1.028534
238 |     1.027096
239 |     1.026720
240 |     1.025726
241 |     1.025525
242 |     1.025322
243 |     1.024046
244 |     1.023472
245 |     1.022728
246 |     1.021405
247 |     1.021292
248 |     1.020123
249 |     1.019659
250 |     1.018497
251 |     1.017623
252 |     1.016996
253 |     1.016558
254 |     1.015365
255 |     1.014977
256 |     1.014861
257 |     1.013228
258 |     1.012071
259 |     1.011203
260 |     1.010988
261 |     1.010408
262 |     1.009380
263 |     1.007719
264 |     1.006532
265 |     1.006188
266 |     1.005178
267 |     1.004976
268 |     1.004504
269 |     1.003641
270 |     1.003191
271 |     1.002226
272 |     1.001685
273 |     1.000463
274 |     1.000337
275 |     0.999162
276 |     0.999093
277 |     0.998314
278 |     0.996493
279 |     0.996077
280 |     0.995453
281 |     0.994081
282 |     0.993268
283 |     0.992901
284 |     0.992295
285 |     0.991612
286 |     0.990978
287 |     0.989752
288 |     0.989216
289 |     0.988180
290 |     0.987540
291 |     0.986679
292 |     0.986364
293 |     0.986057
294 |     0.984414
295 |     0.983226
296 |     0.982901
297 |     0.981817
298 |     0.981467
299 |     0.980461
300 |     0.980450
301 |     0.978894
302 |     0.978277
303 |     0.977965
304 |     0.976690
305 |     0.976150
306 |     0.975217
307 |     0.974853
308 |     0.974546
309 |     0.973920
310 |     0.972813
311 |     0.972662
312 |     0.971645
313 |     0.970437
314 |     0.970208
315 |     0.969803
316 |     0.968331
317 |     0.967328
318 |     0.967162
319 |     0.966423
320 |     0.965545
321 |     0.965118
322 |     0.964917
323 |     0.964216
324 |     0.964092
325 |     0.963881
326 |     0.962568
327 |     0.962315
328 |     0.961592
329 |     0.961002
330 |     0.959286
331 |     0.958323
332 |     0.957938
333 |     0.957303
334 |     0.956762
335 |     0.955824
336 |     0.955388
337 |     0.954092
338 |     0.953812
339 |     0.952941
340 |     0.952400
341 |     0.951429
342 |     0.951262
343 |     0.950638
344 |     0.949361
345 |     0.949126
346 |     0.948465
347 |     0.948398
348 |     0.947911
349 |     0.946306
350 |     0.945961
351 |     0.945585
352 |     0.945162
353 |     0.944826
354 |     0.944243
355 |     0.942899
356 |     0.941973
357 |     0.941903
358 |     0.941103
359 |     0.940600
360 |     0.940065
361 |     0.939547
362 |     0.938725
363 |     0.938459
364 |     0.937967
365 |     0.937326
366 |     0.936784
367 |     0.935674
368 |     0.934585
369 |     0.934092
370 |     0.933774
371 |     0.932926
372 |     0.932327
373 |     0.932100
374 |     0.931712
375 |     0.931148
376 |     0.930232
377 |     0.929202
378 |     0.928810
379 |     0.928522
380 |     0.927521
381 |     0.927328
382 |     0.926623
383 |     0.926042
384 |     0.925250
385 |     0.924349
386 |     0.923473
387 |     0.923142
388 |     0.922366
389 |     0.921817
390 |     0.920906
391 |     0.920704
392 |     0.920235
393 |     0.919048
394 |     0.918843
395 |     0.917704
396 |     0.917045
397 |     0.916487
398 |     0.916337
399 |     0.916258
400 |     0.914903
401 |     0.914464
402 |     0.914041
403 |     0.913881
404 |     0.913094
405 |     0.912843
406 |     0.911970
407 |     0.911049
408 |     0.910202
409 |     0.909671
410 |     0.909440
411 |     0.908489
412 |     0.908079
413 |     0.907741
414 |     0.907548
415 |     0.906737
416 |     0.906195
417 |     0.905644
418 |     0.905111
419 |     0.904308
420 |     0.904040
421 |     0.903366
422 |     0.903223
423 |     0.902615
424 |     0.901622
425 |     0.901467
426 |     0.901179
427 |     0.900098
428 |     0.899927
429 |     0.899792
430 |     0.898742
431 |     0.897845
432 |     0.897437
433 |     0.896148
434 |     0.896008
435 |     0.895563
436 |     0.895410
437 |     0.894941
438 |     0.894237
439 |     0.893422
440 |     0.892932
441 |     0.891691
442 |     0.891632
443 |     0.891039
444 |     0.890625
445 |     0.889614
446 |     0.888892
447 |     0.888194
448 |     0.887807
449 |     0.887436
450 |     0.887371
451 |     0.886883
452 |     0.886195
453 |     0.885164
454 |     0.884265
455 |     0.884077
456 |     0.883718
457 |     0.883042
458 |     0.882538
459 |     0.882156
460 |     0.881516
461 |     0.881069
462 |     0.880161
463 |     0.879962
464 |     0.879528
465 |     0.878992
466 |     0.878825
467 |     0.878309
468 |     0.877843
469 |     0.877039
470 |     0.876641
471 |     0.876319
472 |     0.875079
473 |     0.874339
474 |     0.874230
475 |     0.873874
476 |     0.872857
477 |     0.872205
478 |     0.871989
479 |     0.871395
480 |     0.871152
481 |     0.870044
482 |     0.869753
483 |     0.869425
484 |     0.868858
485 |     0.867088
486 |     0.866709
487 |     0.866390
488 |     0.865938
489 |     0.865212
490 |     0.864425
491 |     0.864098
492 |     0.863748
493 |     0.863379
494 |     0.862923
495 |     0.862344
496 |     0.861429
497 |     0.860924
498 |     0.860355
499 |     0.859607
500 |     0.859477
501 |     0.858994
502 |     0.858649
503 |     0.857258
504 |     0.857078
505 |     0.856810
506 |     0.856177
507 |     0.855982
508 |     0.855824
509 |     0.855144
510 |     0.854475
511 |     0.853987
512 |     0.853647
513 |     0.852613
514 |     0.851769
515 |     0.851541
516 |     0.851137
517 |     0.850515
518 |     0.850446
519 |     0.849870
520 |     0.849277
521 |     0.848720
522 |     0.848360
523 |     0.847259
524 |     0.847193
525 |     0.846994
526 |     0.845861
527 |     0.845364
528 |     0.845043
529 |     0.844380
530 |     0.843360
531 |     0.842825
532 |     0.842303
533 |     0.841749
534 |     0.840919
535 |     0.840577
536 |     0.840009
537 |     0.839850
538 |     0.839015
539 |     0.838237
540 |     0.837920
541 |     0.837112
542 |     0.836878
543 |     0.836296
544 |     0.835706
545 |     0.835160
546 |     0.834576
547 |     0.834100
548 |     0.833720
549 |     0.832415
550 |     0.832231
551 |     0.832171
552 |     0.831552
553 |     0.831134
554 |     0.830803
555 |     0.829452
556 |     0.829250
557 |     0.828797
558 |     0.828511
559 |     0.828037
560 |     0.827513
561 |     0.827326
562 |     0.826752
563 |     0.826086
564 |     0.825650
565 |     0.824973
566 |     0.824409
567 |     0.823716
568 |     0.823206
569 |     0.822073
570 |     0.821757
571 |     0.821173
572 |     0.820873
573 |     0.820026
574 |     0.819839
575 |     0.818535
576 |     0.817973
577 |     0.817111
578 |     0.816751
579 |     0.816403
580 |     0.815699
581 |     0.815324
582 |     0.815057
583 |     0.814641
584 |     0.813534
585 |     0.812941
586 |     0.812452
587 |     0.811794
588 |     0.811542
589 |     0.810991
590 |     0.810724
591 |     0.809677
592 |     0.809547
593 |     0.808589
594 |     0.808439
595 |     0.807674
596 |     0.806734
597 |     0.806092
598 |     0.805974
599 |     0.805148
600 |     0.804800
601 |     0.804496
602 |     0.803970
603 |     0.803488
604 |     0.802755
605 |     0.802544
606 |     0.801909
607 |     0.800832
608 |     0.800408
609 |     0.799845
610 |     0.799108
611 |     0.798765
612 |     0.797802
613 |     0.797240
614 |     0.796978
615 |     0.796476
616 |     0.796009
617 |     0.795472
618 |     0.795240
619 |     0.794519
620 |     0.794289
621 |     0.793201
622 |     0.792818
623 |     0.792018
624 |     0.791806
625 |     0.791236
626 |     0.790462
627 |     0.789405
628 |     0.789034
629 |     0.788772
630 |     0.788432
631 |     0.787917
632 |     0.787389
633 |     0.786096
634 |     0.785929
635 |     0.785572
636 |     0.785046
637 |     0.784438
638 |     0.784134
639 |     0.783391
640 |     0.783121
641 |     0.782414
642 |     0.781586
643 |     0.780389
644 |     0.780236
645 |     0.779400
646 |     0.778745
647 |     0.778569
648 |     0.777887
649 |     0.776693
650 |     0.776351
651 |     0.776070
652 |     0.774920
653 |     0.774418
654 |     0.774176
655 |     0.773834
656 |     0.773016
657 |     0.771908
658 |     0.771459
659 |     0.770713
660 |     0.769870
661 |     0.769206
662 |     0.768963
663 |     0.767738
664 |     0.767465
665 |     0.766510
666 |     0.765908
667 |     0.765482
668 |     0.765061
669 |     0.764236
670 |     0.764026
671 |     0.763117
672 |     0.761653
673 |     0.761508
674 |     0.761167
675 |     0.760186
676 |     0.759873
677 |     0.759109
678 |     0.757924
679 |     0.757252
680 |     0.756537
681 |     0.756182
682 |     0.755559
683 |     0.754790
684 |     0.753408
685 |     0.752626
686 |     0.751722
687 |     0.751316
688 |     0.750610
689 |     0.750227
690 |     0.749308
691 |     0.748432
692 |     0.747693
693 |     0.747260
694 |     0.746210
695 |     0.744726
696 |     0.744155
697 |     0.743165
698 |     0.742642
699 |     0.742262
700 |     0.740150
701 |     0.739781
702 |     0.738910
703 |     0.738168
704 |     0.737248
705 |     0.736519
706 |     0.735534
707 |     0.735123
708 |     0.734035
709 |     0.733200
710 |     0.731250
711 |     0.729438
712 |     0.727373
713 |     0.727101
714 |     0.726889
715 |     0.724111
716 |     0.721821
717 |     0.718911
718 |     0.694921
719 |     0.685380
720 |    -0.000000
721 | 


--------------------------------------------------------------------------------
/01_bashnb_getting_started.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Introduction to Jupyter"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Basic Usage"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "When you first access Jupyter, you will get a file browser view of your home directory on the server. In the beginning, your home directory will be empty, and will be populated with notebooks and files throughout this workshop.\n",
 22 |     "\n",
 23 |     "To create a new text file, click on New (in the upper right corner) and then Text File, which opens a text editor within your browser. You can now add content into the file, or edit existing content and save. The filename can be changed by clicking into the Filename on top. You can now go back to your file browser window and update using the button with the two arrows in the upper right corner, and you should see your text file saved in your home directory.\n",
 24 |     "\n",
 25 |     "You can also use Jupyter to open a Terminal within the browser: Click on New and then Terminal, which will open a terminal window in a separate browser tab. You can enter Unix Bash commands to change directories, view files or execute programs (as we will learn below).\n",
 26 |     "\n",
 27 |     "Finally, you can create new Folders by clicking on New and then Folder. To rename the new folder, click on the checkbox beside the new folder, and click the Rename button on top, which appeared. To change into the new folder, click on it. To move back, click on the parent folder appearing on top of the file browser."
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "***Excercise:*** Create a new folder called hello, and a text file within that folder using Jupyter. Name that text file hello.txt and fill it with arbitrary content, such as `Hello, World!`. Then open a terminal and output the contents of the new text file typing `cat hello/hello.txt` followed by ENTER."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "***Note:*** While the Jupyter terminal and Jupyter Text Files are different ways to interact with the server, both access the same file system. So files created with the Text Editor are saved in your home directory, and can be accessed via the terminal, and vice versa: Files created via the Terminal can be accessed via the Text Editor, by simpling clicking on them in the Jupyter File Browser."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "## Notebooks"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "Notebook can be loaded for different underlying kernels: bash, python and R. Notebooks are useful to document interactive data analysis. It combines code cells with markdown cells. A markdown cell can contain text, math or headings. "
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "You can create new bash notebooks using the \"New\" Dropdown list in the Jupyter File Browser and then selecting \"Bash\". Notebooks open if you click on them."
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "In Jupyter notebooks, you work with *Cells*. You can create new cells, or insert them above or below existing cells using the menu items in the `Insert` menu. Use the dropdown list in the command bar in Jupyter to change the type of the cell. The two main types we're going to use are `Markdown` and `Code`. Markdown cells are useful for documenting stuff, Code cells for running code. Markdown cells can be edited by double-clicking into them. Layout them by runnign Shift-Enter."
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "Code cells are used to enter and execute code. Let's look at some examples."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "We can first check which directory we are in, using the `pwd` (=Present Working Directory) command:"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 2,
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "/home/stephan/popgen_course\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "pwd"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "OK, so we're in the `coursework` subfolder within our home folder `/home/stephan`. We can list the contents of that folder:"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 5,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "01_bashnb_getting_started.ipynb  pca.AllEurasia.params.txt\n",
120 |       "02_pynb_getting_started.ipynb\t pca.WestEurasia.eval\n",
121 |       "03_bashnb_smartpca.ipynb\t pca.WestEurasia.evec\n",
122 |       "04_pynb_plotting_pca.ipynb\t pca.WestEurasia.params.txt\n",
123 |       "pca.AllEurasia.eval\t\t population_frequencies.txt\n",
124 |       "pca.AllEurasia.evec\t\t README.md\n"
125 |      ]
126 |     }
127 |    ],
128 |    "source": [
129 |     "ls"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "We can now create a new directory:"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 6,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "mkdir testDir"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "and change into that directory:"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 7,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "cd testDir"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "and confirm that we are now in the new dir:"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 8,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "/home/stephan/popgen_course/testDir\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "pwd"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "OK, let's go back and delete the subfolder again:"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 9,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "cd ..\n",
202 |     "rm -r testDir"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "Here is a simple example of how to use ``echo``:"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 10,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "Hello, how are you?\n"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "echo \"Hello, how are you?\""
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "OK, so let's try some more useful things with ``grep``, which can be used to filter large text files by searching for patterns, in this case just the occurrence of the word \"French\":"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 11,
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "           HGDP00511 M     French\n",
246 |       "           HGDP00512 M     French\n",
247 |       "           HGDP00513 F     French\n",
248 |       "           HGDP00514 F     French\n",
249 |       "           HGDP00515 M     French\n",
250 |       "           HGDP00516 F     French\n",
251 |       "           HGDP00517 F     French\n",
252 |       "           HGDP00518 M     French\n",
253 |       "           HGDP00519 M     French\n",
254 |       "           HGDP00522 M     French\n",
255 |       "           HGDP00523 F     French\n",
256 |       "           HGDP00524 F     French\n",
257 |       "           HGDP00525 M     French\n",
258 |       "           HGDP00526 F     French\n",
259 |       "           HGDP00527 F     French\n",
260 |       "           HGDP00528 M     French\n",
261 |       "           HGDP00529 F     French\n",
262 |       "           HGDP00531 F     French\n",
263 |       "           HGDP00533 M     French\n",
264 |       "           HGDP00534 F     French\n",
265 |       "           HGDP00535 F     French\n",
266 |       "           HGDP00536 F     French\n",
267 |       "           HGDP00537 F     French\n",
268 |       "           HGDP00538 M     French\n",
269 |       "           HGDP00539 F     French\n",
270 |       "     SouthFrench3326 M     French\n",
271 |       "     SouthFrench3947 M     French\n",
272 |       "     SouthFrench1323 M     French\n",
273 |       "     SouthFrench3951 M     French\n",
274 |       "     SouthFrench3068 M     French\n",
275 |       "     SouthFrench1112 M     French\n",
276 |       "     SouthFrench4018 M     French\n"
277 |      ]
278 |     }
279 |    ],
280 |    "source": [
281 |     "grep French /data/popgen_course/genotypes_small.ind"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "Alright, so that lists all French individuals. Now let's count them, by simply passing the flag `-c`:"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 12,
294 |    "metadata": {},
295 |    "outputs": [
296 |     {
297 |      "name": "stdout",
298 |      "output_type": "stream",
299 |      "text": [
300 |       "32\n"
301 |      ]
302 |     }
303 |    ],
304 |    "source": [
305 |     "grep -c French /data/popgen_course/genotypes_small.ind"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {},
311 |    "source": [
312 |     "***Note:*** We so far have seen the `pwd`, `mkdir`, `cd`, `rm`, `ls` and `grep` commands. If you want to find out more about those, just google them, they are among the most popular and widely used commands/programs in Unix."
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "metadata": {},
318 |    "source": [
319 |     "In Python3 notebooks you can plot things: Create a new python3 notebook, and run this boilerplate code in the first cell:\n",
320 |     "\n",
321 |     "    %matplotlib inline\n",
322 |     "    import matplotlib.pyplot as plt\n",
323 |     "\n",
324 |     "Then plot something, opening a second cell:\n",
325 |     "\n",
326 |     "***Exercise:*** Create a simple plot using `plt.plot([1, 2, 3], [5, 2, 6])`\n"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "# Bash Pipes"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "OK. So this first Notebook operates on Bash, which is more or less the lingua franca of Linux operating systems. Everything you do on command lines uses bash. One of the most useful techniques in bash scripting or bash commands are Unix pipes. To illustrate them, consider the following."
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "Let's look at the structure of our ``ind`` file:"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 4,
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "name": "stdout",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "             Yuk_009 M    Yukagir\n",
360 |       "             Yuk_025 F    Yukagir\n",
361 |       "             Yuk_022 F    Yukagir\n",
362 |       "             Yuk_020 F    Yukagir\n",
363 |       "               MC_40 M    Chukchi\n",
364 |       "             Yuk_024 F    Yukagir\n",
365 |       "             Yuk_023 F    Yukagir\n",
366 |       "               MC_16 M    Chukchi\n",
367 |       "               MC_15 F    Chukchi\n",
368 |       "               MC_18 M    Chukchi\n"
369 |      ]
370 |     }
371 |    ],
372 |    "source": [
373 |     "head /data/popgen_course/genotypes_small.ind"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "markdown",
378 |    "metadata": {},
379 |    "source": [
380 |     "***Note:*** The `head` command just lists the top 10 rows of a file."
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {},
386 |    "source": [
387 |     "Let's filter out the population column:"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": 5,
393 |    "metadata": {},
394 |    "outputs": [
395 |     {
396 |      "name": "stdout",
397 |      "output_type": "stream",
398 |      "text": [
399 |       "Yukagir\n",
400 |       "Yukagir\n",
401 |       "Yukagir\n",
402 |       "Yukagir\n",
403 |       "Chukchi\n",
404 |       "Yukagir\n",
405 |       "Yukagir\n",
406 |       "Chukchi\n",
407 |       "Chukchi\n",
408 |       "Chukchi\n"
409 |      ]
410 |     }
411 |    ],
412 |    "source": [
413 |     "head /data/popgen_course/genotypes_small.ind | awk '{print $3}'"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "markdown",
418 |    "metadata": {},
419 |    "source": [
420 |     "***Note:*** The `awk` program is one of the most powerful programs for text-file processing in the Unix-world. It is actually a full-fledged programming language itself. Here we only use it in one of its simplest form. The program `{print $3}` simply says \"For every line of the input file, print out the third field\"."
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "markdown",
425 |    "metadata": {},
426 |    "source": [
427 |     "***Note:*** The pipe symbol `|` tells Unix to redirect the output of the program to its left into the program to its right as standard input. "
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "markdown",
432 |    "metadata": {},
433 |    "source": [
434 |     "Let's sort the output (notice we now use ``cat`` instead of ``head``, but use ``head`` in the end:"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": 8,
440 |    "metadata": {},
441 |    "outputs": [
442 |     {
443 |      "name": "stdout",
444 |      "output_type": "stream",
445 |      "text": [
446 |       "Abkhasian\n",
447 |       "Abkhasian\n",
448 |       "Abkhasian\n",
449 |       "Abkhasian\n",
450 |       "Abkhasian\n",
451 |       "Abkhasian\n",
452 |       "Abkhasian\n",
453 |       "Abkhasian\n",
454 |       "Abkhasian\n",
455 |       "Adygei\n",
456 |       "sort: Schreiben fehlgeschlagen: Standardausgabe: Datenübergabe unterbrochen (broken pipe)\n",
457 |       "sort: Schreibfehler\n"
458 |      ]
459 |     }
460 |    ],
461 |    "source": [
462 |     "cat /data/popgen_course/genotypes_small.ind | awk '{print $3}' | sort | head"
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "markdown",
467 |    "metadata": {},
468 |    "source": [
469 |     "OK, so there are some error messages in the end because ``head`` ungracefully discards the rest of the data, but that's OK."
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "markdown",
474 |    "metadata": {},
475 |    "source": [
476 |     "Now let's use ``uniq`` to get rid of population name duplicates:"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": 9,
482 |    "metadata": {},
483 |    "outputs": [
484 |     {
485 |      "name": "stdout",
486 |      "output_type": "stream",
487 |      "text": [
488 |       "Abkhasian\n",
489 |       "Adygei\n",
490 |       "Albanian\n",
491 |       "Aleut\n",
492 |       "Aleut_Tlingit\n",
493 |       "Altaian\n",
494 |       "Ami\n",
495 |       "Armenian\n",
496 |       "Atayal\n",
497 |       "Balkar\n"
498 |      ]
499 |     }
500 |    ],
501 |    "source": [
502 |     "cat /data/popgen_course/genotypes_small.ind | awk '{print $3}' | sort | uniq | head"
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "markdown",
507 |    "metadata": {},
508 |    "source": [
509 |     "And now let's count:"
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "code",
514 |    "execution_count": 10,
515 |    "metadata": {},
516 |    "outputs": [
517 |     {
518 |      "name": "stdout",
519 |      "output_type": "stream",
520 |      "text": [
521 |       "116\n"
522 |      ]
523 |     }
524 |    ],
525 |    "source": [
526 |     "cat /data/popgen_course/genotypes_small.ind | awk '{print $3}' | sort | uniq | wc -l"
527 |    ]
528 |   },
529 |   {
530 |    "cell_type": "markdown",
531 |    "metadata": {},
532 |    "source": [
533 |     "OK, so there are 116 populations in the dataset. And how many individuals?"
534 |    ]
535 |   },
536 |   {
537 |    "cell_type": "code",
538 |    "execution_count": 11,
539 |    "metadata": {},
540 |    "outputs": [
541 |     {
542 |      "name": "stdout",
543 |      "output_type": "stream",
544 |      "text": [
545 |       "1340 /data/popgen_course/genotypes_small.ind\n"
546 |      ]
547 |     }
548 |    ],
549 |    "source": [
550 |     "wc -l /data/popgen_course/genotypes_small.ind"
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "markdown",
555 |    "metadata": {},
556 |    "source": [
557 |     "So 1340 individuals on 116 populations, so a bit more than 10 per population on average. Good to know!"
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "markdown",
562 |    "metadata": {},
563 |    "source": [
564 |     "***Note:*** we learned some new Unix commands: `awk`, `cat`, `head`, `sort`, `uniq` and `wc`."
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "markdown",
569 |    "metadata": {},
570 |    "source": [
571 |     "As a final step, let's modify our pipeline to output not just the unique populations, but also the number of individuals per populations. Fortunately this is extremely easy, since the flag `-c` to the `uniq` command already does the job:"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "code",
576 |    "execution_count": 20,
577 |    "metadata": {},
578 |    "outputs": [
579 |     {
580 |      "name": "stdout",
581 |      "output_type": "stream",
582 |      "text": [
583 |       "      9 Abkhasian\n",
584 |       "     16 Adygei\n",
585 |       "      6 Albanian\n",
586 |       "      7 Aleut\n",
587 |       "      4 Aleut_Tlingit\n",
588 |       "      7 Altaian\n",
589 |       "     10 Ami\n",
590 |       "     10 Armenian\n",
591 |       "      9 Atayal\n",
592 |       "     10 Balkar\n"
593 |      ]
594 |     }
595 |    ],
596 |    "source": [
597 |     "cat /data/popgen_course/genotypes_small.ind | awk '{print $3}' | sort | uniq -c | head"
598 |    ]
599 |   },
600 |   {
601 |    "cell_type": "markdown",
602 |    "metadata": {},
603 |    "source": [
604 |     "Nice. Let's put that list into a file that we can then import for plotting later."
605 |    ]
606 |   },
607 |   {
608 |    "cell_type": "code",
609 |    "execution_count": 21,
610 |    "metadata": {},
611 |    "outputs": [],
612 |    "source": [
613 |     "cat /data/popgen_course/genotypes_small.ind | awk '{print $3}' | sort | uniq -c > population_frequencies.txt"
614 |    ]
615 |   },
616 |   {
617 |    "cell_type": "markdown",
618 |    "metadata": {},
619 |    "source": [
620 |     "OK, we have created a new file called `population_frequencies.txt` in our current directory. We have used the bash redirection sumbol `>` for writing outputs from a command or pipeline into a file. The file should now contain the population number data. We can check this by running:"
621 |    ]
622 |   },
623 |   {
624 |    "cell_type": "code",
625 |    "execution_count": 22,
626 |    "metadata": {},
627 |    "outputs": [
628 |     {
629 |      "name": "stdout",
630 |      "output_type": "stream",
631 |      "text": [
632 |       "      9 Abkhasian\n",
633 |       "     16 Adygei\n",
634 |       "      6 Albanian\n",
635 |       "      7 Aleut\n",
636 |       "      4 Aleut_Tlingit\n",
637 |       "      7 Altaian\n",
638 |       "     10 Ami\n",
639 |       "     10 Armenian\n",
640 |       "      9 Atayal\n",
641 |       "     10 Balkar\n"
642 |      ]
643 |     }
644 |    ],
645 |    "source": [
646 |     "head population_frequencies.txt"
647 |    ]
648 |   },
649 |   {
650 |    "cell_type": "markdown",
651 |    "metadata": {},
652 |    "source": [
653 |     "OK, it seems to have worked. If you want to look at the file in a more interactive way, go back to your Jupyter File Browser and click on the file, which you should now see within your working directory. The file should open in a text editor that you can use to scroll around."
654 |    ]
655 |   },
656 |   {
657 |    "cell_type": "markdown",
658 |    "metadata": {},
659 |    "source": [
660 |     "OK, now that we have a file to plot, let's try it out using a new python3 notebook. See the next notebook, called `02_pynb_getting_started` in this series."
661 |    ]
662 |   }
663 |  ],
664 |  "metadata": {
665 |   "kernelspec": {
666 |    "display_name": "Bash",
667 |    "language": "bash",
668 |    "name": "bash"
669 |   },
670 |   "language_info": {
671 |    "codemirror_mode": "shell",
672 |    "file_extension": ".sh",
673 |    "mimetype": "text/x-sh",
674 |    "name": "bash"
675 |   }
676 |  },
677 |  "nbformat": 4,
678 |  "nbformat_minor": 2
679 | }
680 | 


--------------------------------------------------------------------------------
/03_bashnb_smartpca.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Principal Components Analysis (PCA)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Principal components analysis (PCA) is one of the most useful techniques to visualise genetic diversity in a dataset. The methodology is not restricted to genetic data, but in general allows breaking down high-dimensional datasets to two or more dimensions for visualisation in a two-dimensional space."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Genotype Data"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "This lesson is also our first contact with the genotype data used in this and most of the following lessons. The dataset that we will work with contains 1,340 individuals, each represented by 593,124 single nucleotide polymorphisms (SNPs). Those SNPs have exactly two different alleles, and each individual has one of four possible values at each genotype: homozygous reference, heterozygous, homozygous alternative, or missing. Those four values are encoded 2, 1, 0 and 9 respectively. \n",
 29 |     "\n",
 30 |     "The data is laid out as a matrix, with columns indicating individuals, and rows indicating SNPs. The data itself comes in the so-called \"EIGENSTRAT\" format, which is defined in the [Eigensoft package](https://github.com/DReichLab/EIG) used by many tools used in this workshop. In this format, a genotype dataset consists of three files, usually with the following file endings:\n",
 31 |     "\n",
 32 |     "* `*.snp`: The file containing the SNP positions. It consists of six columns: SNP-name, chromosome, genetic positions, physical position, reference allele, alternative allele.\n",
 33 |     "* `*.ind`: The file containing the names of the individuals. It consists of three columns: Individual Name, Sex (encoded as M(ale), F(emale), or U(nknown)), and population name.\n",
 34 |     "* `*.geno`: The file containing the genotype matrix, with individuals laid out from left to right, and SNP positions laid out from top to bottom.\n",
 35 |     "  \n",
 36 |     "In the following, we will explore the files using bash in this notebook."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "The data that we want to analyse is stored at `/data/popgen_course`. Let's list the contents of that directory:"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 1,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "name": "stdout",
 53 |      "output_type": "stream",
 54 |      "text": [
 55 |       "AllEurasia.poplist.txt\tgenotypes_small.ind  WestEurasia.poplist.txt\n",
 56 |       "genotypes_small.geno\tgenotypes_small.snp\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "ls /data/popgen_course"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "Let's explore those files a bit. Here are the first 20 individuals:"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 2,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "             Yuk_009 M    Yukagir\n",
 81 |       "             Yuk_025 F    Yukagir\n",
 82 |       "             Yuk_022 F    Yukagir\n",
 83 |       "             Yuk_020 F    Yukagir\n",
 84 |       "               MC_40 M    Chukchi\n",
 85 |       "             Yuk_024 F    Yukagir\n",
 86 |       "             Yuk_023 F    Yukagir\n",
 87 |       "               MC_16 M    Chukchi\n",
 88 |       "               MC_15 F    Chukchi\n",
 89 |       "               MC_18 M    Chukchi\n",
 90 |       "             Yuk_004 M    Yukagir\n",
 91 |       "               MC_08 F    Chukchi\n",
 92 |       "             Nov_005 M   Nganasan\n",
 93 |       "               MC_25 F    Chukchi\n",
 94 |       "             Yuk_019 F    Yukagir\n",
 95 |       "             Yuk_011 M    Yukagir\n",
 96 |       "             Sesk_47 M   Chukchi1\n",
 97 |       "               MC_17 M    Chukchi\n",
 98 |       "             Yuk_021 M    Yukagir\n",
 99 |       "               MC_06 F    Chukchi\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "head -20 /data/popgen_course/genotypes_small.ind"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "And here the first 20 SNP rows:"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 3,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "name": "stdout",
121 |      "output_type": "stream",
122 |      "text": [
123 |       "            1_752566     1        0.020130          752566 G A\n",
124 |       "            1_842013     1        0.022518          842013 T G\n",
125 |       "            1_891021     1        0.024116          891021 G A\n",
126 |       "            1_903426     1        0.024457          903426 C T\n",
127 |       "            1_949654     1        0.025727          949654 A G\n",
128 |       "           1_1018704     1        0.026288         1018704 A G\n",
129 |       "           1_1045331     1        0.026665         1045331 G A\n",
130 |       "           1_1048955     1        0.026674         1048955 A G\n",
131 |       "           1_1061166     1        0.026711         1061166 T C\n",
132 |       "           1_1108637     1        0.028311         1108637 G A\n",
133 |       "           1_1120431     1        0.028916         1120431 G A\n",
134 |       "           1_1156131     1        0.029335         1156131 T C\n",
135 |       "           1_1157547     1        0.029356         1157547 T C\n",
136 |       "           1_1158277     1        0.029367         1158277 G A\n",
137 |       "           1_1161780     1        0.029391         1161780 C T\n",
138 |       "           1_1170587     1        0.029450         1170587 C T\n",
139 |       "           1_1205155     1        0.029735         1205155 A C\n",
140 |       "           1_1211292     1        0.029785         1211292 C T\n",
141 |       "           1_1235792     1        0.030045         1235792 C T\n",
142 |       "           1_1254255     1        0.030111         1254255 G A\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "head -20 /data/popgen_course/genotypes_small.snp"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "And here are the first 20 genotypes of the first 50 individuals:"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 3,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "01011012111022101020212001000102000000110010002000\n",
167 |       "20121210122100111221001112022012221211022221211210\n",
168 |       "11001120011100210010011110000112000001111000011100\n",
169 |       "00001122102221212211211002022212221221121122112021\n",
170 |       "00000000000000000000000000001000000000000000001000\n",
171 |       "10121002211022011011211101201100000100120020102001\n",
172 |       "22222222222222222222222222222222222222222222222222\n",
173 |       "22112220022120221020012122222122122222101222121212\n",
174 |       "22112220022120221020012122020122122122101222121211\n",
175 |       "22222222221022222022222222222222222222222222112222\n",
176 |       "22122222121222222222222222222212222222222222202211\n",
177 |       "11011000010000010010000002220100212000012021101011\n",
178 |       "12211212212222112212222221212212222122222222222222\n",
179 |       "12211212212222112212222221212212222122222222222222\n",
180 |       "12211212212222112212222221212212222122222222222222\n",
181 |       "22222222222222222222222222222222222222222222222222\n",
182 |       "22222222222222222222222222222222222222222222222222\n",
183 |       "10111111021001110011002001222210222112112220212122\n",
184 |       "22222222222222222222222222222222222222222222222222\n",
185 |       "21221212121022212022222222222222211222122221922222\n"
186 |      ]
187 |     }
188 |    ],
189 |    "source": [
190 |     "head -20 /data/popgen_course/genotypes_small.geno | cut -c1-50"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "Counting how many individuals and SNPs there are:"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 4,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "1340 /data/popgen_course/genotypes_small.ind\n",
210 |       "593124 /data/popgen_course/genotypes_small.snp\n"
211 |      ]
212 |     }
213 |    ],
214 |    "source": [
215 |     "wc -l /data/popgen_course/genotypes_small.ind\n",
216 |     "wc -l /data/popgen_course/genotypes_small.snp"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "And now we check that the first row of the `*.geno` file indeed contains the same number of columns:"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 6,
229 |    "metadata": {},
230 |    "outputs": [
231 |     {
232 |      "name": "stdout",
233 |      "output_type": "stream",
234 |      "text": [
235 |       "1341\n"
236 |      ]
237 |     }
238 |    ],
239 |    "source": [
240 |     "head -1 /data/popgen_course/genotypes_small.geno | wc -c"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "which is one more, including the newline character at the end of the line. Now counting the number of rows in the `*.geno`-file (this takes a few seconds, as the file is several hundred MB large):"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 7,
253 |    "metadata": {},
254 |    "outputs": [
255 |     {
256 |      "name": "stdout",
257 |      "output_type": "stream",
258 |      "text": [
259 |       "593124 /data/popgen_course/genotypes_small.geno\n"
260 |      ]
261 |     }
262 |    ],
263 |    "source": [
264 |     "wc -l /data/popgen_course/genotypes_small.geno"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "Great, the number of rows and columns agrees with the numbers indicated in the `*.ind` and `*.snp` file!\n",
272 |     "Now we're counting how many different populations there are. Let's first see the first 10 populations in the sorted list, alongside the number of individuals in each group:"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 5,
278 |    "metadata": {},
279 |    "outputs": [
280 |     {
281 |      "name": "stdout",
282 |      "output_type": "stream",
283 |      "text": [
284 |       "      9 Abkhasian\n",
285 |       "     16 Adygei\n",
286 |       "      6 Albanian\n",
287 |       "      7 Aleut\n",
288 |       "      4 Aleut_Tlingit\n",
289 |       "      7 Altaian\n",
290 |       "     10 Ami\n",
291 |       "     10 Armenian\n",
292 |       "      9 Atayal\n",
293 |       "     10 Balkar\n",
294 |       "     29 Basque\n",
295 |       "     25 BedouinA\n",
296 |       "     19 BedouinB\n",
297 |       "     10 Belarusian\n",
298 |       "      6 BolshoyOleniOstrov\n",
299 |       "      9 Borneo\n",
300 |       "     10 Bulgarian\n",
301 |       "      8 Cambodian\n",
302 |       "      2 Canary_Islander\n",
303 |       "      2 ChalmnyVarre\n"
304 |      ]
305 |     }
306 |    ],
307 |    "source": [
308 |     "awk '{print $3}' /data/popgen_course/genotypes_small.ind | sort | uniq -c | head -20"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "## How PCA works\n",
316 |     "\n",
317 |     "To understand how PCA works, consider a single individual and its representation by its 593,124 markers. Formally, each individual is a point in a 593,124-dimensional space, where each dimension\n",
318 |     "can take only the three possible genotypes indicated above, or have missing data. To visualise this high-dimensional dataset, we would like to project it down to two dimensions. But as there are many ways to project the shadow of a three-dimensional object on a two dimensional plane, there are many (and even more) ways to project a 593,124-dimensional cloud of points to two dimensions. What PCA does is figuring out the \"best\" way to do this project in order to visualise the major components of variance in the data.\n"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {},
324 |    "source": [
325 |     "## Parameter files\n",
326 |     "For actually running the analysis, we use a software called `smartPCA` from the [Eigensoft package](https://github.com/DReichLab/EIG). As many other tools from this and related packages, `smartPCA` reads in a parameter file which specifies its input and output files and options. In our case, we want the parameter file to have the following content:\n",
327 |     "\n",
328 |     "    genotypename: /data/popgen_course/genotypes_small.geno\n",
329 |     "    snpname: /data/popgen_course/genotypes_small.snp\n",
330 |     "    indivname: /data/popgen_course/genotypes_small.ind\n",
331 |     "    evecoutname: pca.WestEurasia.evec\n",
332 |     "    evaloutname: pca.WestEurasia.eval\n",
333 |     "    poplistname: /data/popgen_course/WestEurasia.poplist.txt\n",
334 |     "    lsqproject: YES\n",
335 |     "    numoutevec: 4\n",
336 |     "    numthreads: 1"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "metadata": {},
342 |    "source": [
343 |     "Here, the first three parameters specify the input genotype files. The next two rows specify two output file names, typically with ending `*.evec` and `*.eval`. The parameter line beginning with `poplistname` contains a file with a list of populations used for calculating the principal components (see below). The option `lsqproject` is important for applications including ancient DNA with lots of missing data, which I will not elaborate on. For the purpose of this workshop, you should use `lsqproject: YES`. The next option `numoutevec` specifies the number of principal components that we compute, the last option `numthreads` the number of CPUs to use for this run. We use just one since we're working together on the same computer, so cannot afford everyone running on lots of CPUs."
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "## Population lists vs. Projection\n",
351 |     "\n",
352 |     "The parameter named `poplistname` is a very crucial one. It specifies the populations whose individuals are used to calculate the principal components. Why not just all of them you ask? For two reasons: First, there are simply too many of them and we don't want to use all of them, since the computation would take too long. More importantly, however, we generally try to avoid using ancient samples to compute principal components, to avoid specific ancient-DNA related artefacts affecting the computation. Finally, the list of populations to use for PCA should be informed by your question. If you're investigating African population structure, in makes no sense to put Asian or European individuals in your population list, since then the main axes of genetic differentiation would not be inside of Africa, but between Africans and Non-Africans.\n",
353 |     "\n",
354 |     "So what happens to individuals that are not in populations listed in the population list? Well, fortunately, they are not just ignored, but \"projected\". This means that after the principal components have been computed, *all* individuals (not just the one in the list) are projected onto these principal components. That way, we can visualise ancient populations in the context of modern genetic variation. While that may sound a bit problematic at first (Some variation in ancient populations is not represented well by modern populations), but it turns out to be nevertheless one of the most useful tools for this purpose. The advantage of avoiding ancient-DNA artefacts and batch effects to affect the visualisation outweighs the disadvantage of missing some private genetic variation components in the ancient populations themselves. Of course, that argument breaks down once the analysed populations become too ancient and detached from modern genetic variation. But for our purposes it will work just fine.\n",
355 |     "\n",
356 |     "For this workshop, I prepared two population lists::\n",
357 |     "\n",
358 |     "    /data/popgen_course/WestEurasia.poplist.txt\n",
359 |     "    /data/popgen_course/AllEurasia.poplist.txt\n",
360 |     "\n",
361 |     "As you can tell from the names of the files, they specify two sets of modern populations representing West Eurasia or all of Europe and Asia, respectively.\n",
362 |     "\n",
363 |     "I recommend to look through both of the population lists and google some population names that you don't recognise to get a feeling for the ethnic groups represented here."
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "markdown",
368 |    "metadata": {},
369 |    "source": [
370 |     "## Running `smartPCA`"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "markdown",
375 |    "metadata": {},
376 |    "source": [
377 |     "Now go ahead and open a new text file using your Jupyter Browser, you can name it anything you like. For the sake of a concrete name, let's call it `pca.WestEurasia.params.txt`. Text files in Jupyter are opene in a text editor, so you can then simply copy-paste the above lines into the new file."
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "markdown",
382 |    "metadata": {},
383 |    "source": [
384 |     "Let's see whether it worked, by printing out the contents of that file into your notebook:"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": 8,
390 |    "metadata": {},
391 |    "outputs": [
392 |     {
393 |      "name": "stdout",
394 |      "output_type": "stream",
395 |      "text": [
396 |       "genotypename: /data/popgen_course/genotypes_small.geno\n",
397 |       "snpname: /data/popgen_course/genotypes_small.snp\n",
398 |       "indivname: /data/popgen_course/genotypes_small.ind\n",
399 |       "evecoutname: pca.WestEurasia.evec\n",
400 |       "evaloutname: pca.WestEurasia.eval\n",
401 |       "poplistname: /data/popgen_course/WestEurasia.poplist.txt\n",
402 |       "lsqproject: YES\n",
403 |       "numoutevec: 4\n",
404 |       "numthreads: 1\n"
405 |      ]
406 |     }
407 |    ],
408 |    "source": [
409 |     "cat pca.WestEurasia.params.txt"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "markdown",
414 |    "metadata": {},
415 |    "source": [
416 |     "Great, so that's our parameter file for running `smartPCA`.\n",
417 |     "\n",
418 |     "***Note:*** that we specified two output files in our parameter file, here called `pca.WestEurasia.evec` and `pca.WestEurasia.eval`. You can actually put any names you want in there. But beware of relative vs. absolute paths. File names starting with `/` are considered \"absolute\", that is, taken to go from the root of the file system. In contrast, filenames not starting with `/` are considered \"relative\" to the current working directory. If you forgot which directory you're in, run `pwd`.\n",
419 |     "\n",
420 |     "***Note:*** The option `poplistname` is a crucial one. Here you need to specify which populations are used to compute the eigenvectors of the principal components analysis. In our case, I have prepared two population list files: `/data/popgen_course/WestEurasia.poplist.txt` and `/data/popgen_course/AllEurasia.poplist.txt`. Pick one of the two to carry on."
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "markdown",
425 |    "metadata": {},
426 |    "source": [
427 |     "Good, now we can run `smartPCA`. To do that, it's more convenient to use the terminal than a Notebook. So open a terminal and run\n",
428 |     "\n",
429 |     "    smartpca -p pca.WestEurasia.params.txt"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "markdown",
434 |    "metadata": {},
435 |    "source": [
436 |     "This will typically run for about 30 minutes and output lots of logging output to the screen."
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "In a similar manner we can prepare a parameter file for the AllEurasia population list. This is how it should look:"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": 11,
449 |    "metadata": {},
450 |    "outputs": [
451 |     {
452 |      "name": "stdout",
453 |      "output_type": "stream",
454 |      "text": [
455 |       "genotypename: /data/popgen_course/genotypes_small.geno\n",
456 |       "snpname: /data/popgen_course/genotypes_small.snp\n",
457 |       "indivname: /data/popgen_course/genotypes_small.ind\n",
458 |       "evecoutname: pca.AllEurasia.evec\n",
459 |       "evaloutname: pca.AllEurasia.eval\n",
460 |       "poplistname: /data/popgen_course/AllEurasia.poplist.txt\n",
461 |       "lsqproject: YES\n",
462 |       "numoutevec: 4\n",
463 |       "numthreads: 1\n"
464 |      ]
465 |     }
466 |    ],
467 |    "source": [
468 |     "cat pca.AllEurasia.params.txt"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "markdown",
473 |    "metadata": {},
474 |    "source": [
475 |     "And similar to the command above, we can run pca on the AllEurasia population list via:\n",
476 |     "\n",
477 |     "    smartpca -p pca.AllEurasia.params.txt"
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "markdown",
482 |    "metadata": {},
483 |    "source": [
484 |     "which will run slightly longer than the first one because there are more populations "
485 |    ]
486 |   }
487 |  ],
488 |  "metadata": {
489 |   "kernelspec": {
490 |    "display_name": "Bash",
491 |    "language": "bash",
492 |    "name": "bash"
493 |   },
494 |   "language_info": {
495 |    "codemirror_mode": "shell",
496 |    "file_extension": ".sh",
497 |    "mimetype": "text/x-sh",
498 |    "name": "bash"
499 |   }
500 |  },
501 |  "nbformat": 4,
502 |  "nbformat_minor": 2
503 | }
504 | 


--------------------------------------------------------------------------------
/05_Rmd_fstatistics.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "F Statistics"
  3 | output: html_document
  4 | editor_options: 
  5 |   chunk_output_type: console
  6 | ---
  7 | 
  8 | ```{r, echo=FALSE}
  9 | knitr::opts_chunk$set(message = FALSE)
 10 | ```
 11 | 
 12 | ```{r}
 13 | library(magrittr)
 14 | ```
 15 | 
 16 | ## F3 Statistics
 17 | 
 18 | F3 statistics are a useful analytical tool to understand population relationships. F3 statistics, just as F4 and F2 statistics measure allele frequency correlations between populations and were introduced by Nick Patterson in his [Patterson 2012](http://www.genetics.org/content/early/2012/09/06/genetics.112.145037).
 19 | 
 20 | F3 statistics are used for two purposes: i) as a test whether a target population (C) is admixed between two source populations (A and B), and ii) to measure shared drift between two test populations (A and B) from an outgroup (C).
 21 | 
 22 | F3 statistics are in both cases defined as the product of allele frequency differences between population C to A and B, respectively:
 23 | 
 24 | $$F3(A,B;C)=\langle(c−a)(c−b)\rangle$$
 25 | 
 26 | Here, $\langle\cdot\rangle$ denotes the average over all genotyped sites, and a, b and c
 27 | denote the allele frequency for a given site in the three populations A, B and C.
 28 | 
 29 | ## Admixture F3 Statistics
 30 | 
 31 | It can be shown that if that statistics is negative, it provides unambiguous proof that population C is admixed between populations A and B, as in the following phylogeny (taken from Figure 1 from [Patterson 2012](http://www.genetics.org/content/early/2012/09/06/genetics.112.145037):
 32 | 
 33 | <img src="img/f3_phylogeny.png" alt="F3-phylogeny" style="width: 300px;"/>
 34 | 
 35 | Intuitively, an F3 statistics becomes negative if the allele frequency of the target population C is on average intermediate between the allele frequencies of A and B. Consider as an extreme example a genomic site where a=0, b=1 and c=0.5. Then we have (c−a)(c−b)=−0.25, which is negative. So if the entire statistics is negative, it suggests that in many positions, the allele frequency c is indeed intermediate, suggesting admixture between the two sources.
 36 | 
 37 | **Note:** If an F3 statistics is *not* negative, it does *not* proof that there is no admixture!
 38 | 
 39 | We will use this statistics to test if Finnish are admixed between East and West, using different Eastern and Western sources. In the West, we use French, Icelandic, Lithuanian and Norwegian as source, and in the East we use Nganasan and one of the ancient individuals analysed in this workshop, *Bolshoy Oleni Ostrov*, 3500 year old individuals from the Northern Russian Kola-peninsula.
 40 | 
 41 | We use the software `qp3Pop` from [AdmixTools](https://github.com/DReichLab/AdmixTools), which similar to `smartpca` takes a parameter file:
 42 | 
 43 | ```
 44 | genotypename:   input genotype file (in eigenstrat format)
 45 | snpname:        input snp file      (in eigenstrat format)
 46 | indivname:      input indiv file    (in eigenstrat format)
 47 | popfilename:    a file containing rows with three populations on each line A, B and C.
 48 | inbreed: YES
 49 | ```
 50 | 
 51 | Here, the last option is necessary if we are analysing pseudo-diploid ancient data (which is the case here).
 52 | 
 53 | To prepare the `popfilename`, create a new text file with the following content:
 54 | 
 55 | ```
 56 | Nganasan French Finnish
 57 | Nganasan Icelandic Finnish
 58 | Nganasan Lithuanian Finnish
 59 | Nganasan Norwegian Finnish
 60 | BolshoyOleniOstrov French Finnish
 61 | BolshoyOleniOstrov Icelandic Finnish
 62 | BolshoyOleniOstrov Lithuanian Finnish
 63 | BolshoyOleniOstrov Norwegian Finnish
 64 | ```
 65 | 
 66 | **Exercise:** Prepare the parameter file with the input data as in the PCA session (see Principal Components Analysis (PCA)) and then run `qp3Pop -p PARAMETER_FILE`, where `PARAMETERFILE` should be replaced by your parameter file name. As genotype data, use the files called `/data/popgen_course/HumanOrigins_FennoScandian_small.*`.
 67 | 
 68 | The results are in the output that you can view in the Notebook. The crucial bit should look like this:
 69 | 
 70 | ```
 71 | Source 1	Source 2	Target	f_3	std. err	Z	SNPs
 72 | result:	Nganasan	French	Finnish	-0.004539	0.000510	-8.894	442567
 73 | result:	Nganasan	Icelandic	Finnish	-0.005297	0.000563	-9.404	427954
 74 | result:	Nganasan	Lithuanian	Finnish	-0.005062	0.000590	-8.574	426231
 75 | result:	Nganasan	Norwegian	Finnish	-0.004744	0.000569	-8.332	428161
 76 | result:	BolshoyOleniOstrov	French	Finnish	-0.002814	0.000444	-6.341	402958
 77 | result:	BolshoyOleniOstrov	Icelandic	Finnish	-0.002590	0.000486	-5.323	386418
 78 | result:	BolshoyOleniOstrov	Lithuanian	Finnish	-0.001523	0.000536	-2.840	384134
 79 | result:	BolshoyOleniOstrov	Norwegian	Finnish	-0.001553	0.000502	-3.092	386203
 80 | ```
 81 | 
 82 | This output shows as first three columns the three populations A, B (sources) and C (target). Then the f3 statistics, which is negative in all cases tested here, a standard error, a Z score and the number of SNPs involved in the statistics.
 83 | 
 84 | The Z score is key: It gives the deviation of the f3 statistic from zero in units of the standard error. As general rule, a Z score of -3 or more suggests a significant rejection of the Null hypothesis that the statistic is not negative. In this case, all of the statistics are significantly negative, proving that Finnish have ancestral admixture of East and West Eurasian ancestry. Note that the statistics does not suggest when this admixture happened!
 85 | 
 86 | ## F4 Statistics
 87 | 
 88 | A different way to test for admixture is by “F4 statistics” (or “D statistics” which is very similar), also introduced in [Patterson 2012](http://www.genetics.org/content/early/2012/09/06/genetics.112.145037).
 89 | 
 90 | F4 statistics are also defined in terms of correlations of allele frequency differences, similarly to F3 statistics (see above), but involving four different populations, not just three. Specifically we define
 91 | 
 92 | $$F4(A,B;C,D)=\langle(a−b)(c−d)\rangle.$$
 93 | 
 94 | To understand the statistics, consider the following tree:
 95 | 
 96 | <img src="img/f4_phylogeny.png" alt="F4-phylogeny" style="width: 300px;"/>
 97 | 
 98 | In this tree, without any additional admixture, the allele frequency difference between A and B should be completely independent from the allele frequency difference between C and D. In that case, F4(A, B; C, D) should be zero, or at least not statistically different from zero. However, if there was gene flow from C or D into A or B, the statistic should be different from zero. Specifically, if the statistic is significantly negative, it implies gene flow between either C and B, or D and A. If it is significantly positive, it implies gene flow between A and C, or B and D.
 99 | 
100 | The way this statistic is often used, is to put a divergent outgroup as population A, for which we know for sure that there was no admixture into either C or D. With this setup, we can then test for gene flow between B and D (if the statistic is positive), or B and C (if it is negative).
101 | 
102 | Here, we can use this statistic to test for East Asian admixture in Finns, similarly to the test using Admixture F3 statistics above. We will use the `qpDstat` program from [AdmixTools](https://github.com/DReichLab/AdmixTools) for that. We need to again prepare a population list file, this time with four populations (A, B, C, D). I suggest you open a new file and fill it with:
103 | 
104 | ```
105 | Mbuti Nganasan French Finnish
106 | Mbuti Nganasan Icelandic Finnish
107 | Mbuti Nganasan Lithuanian Finnish
108 | Mbuti Nganasan Norwegian Finnish
109 | Mbuti BolshoyOleniOstrov French Finnish
110 | Mbuti BolshoyOleniOstrov Icelandic Finnish
111 | Mbuti BolshoyOleniOstrov Lithuanian Finnish
112 | Mbuti BolshoyOleniOstrov Norwegian Finnish
113 | ```
114 | 
115 | You can then use this file again in a parameter file, similar to the one prepared for `qp3Pop` above:
116 | 
117 | ```
118 | genotypename:   input genotype file (in eigenstrat format)
119 | snpname:        input snp file      (in eigenstrat format)
120 | indivname:      input indiv file    (in eigenstrat format)
121 | popfilename:    a file containing rows with three populations on each line A, B and C.
122 | f4mode: YES
123 | ```
124 | 
125 | Note that you cannot give the “inbreed” option here.
126 | 
127 | **Exercise:** Prepare the parameter file as suggested above and then run `qpDstat -p PARAMETER_FILE`, where `PARAMETERFILE` should be replaced by your parameter file name. This will take 5-6 minutes.
128 | 
129 | The results should be (skipping some header lines):
130 | 
131 | ```
132 | result:	Mbuti	Nganasan     French    Finnish      0.002363     19.016   29254  27852 593124
133 | result:	Mbuti	Nganasan  Icelandic    Finnish      0.001721     11.926   28915  27894 593124
134 | result:	Mbuti	Nganasan Lithuanian    Finnish      0.001368      9.664   28745  27933 593124
135 | result:	Mbuti	Nganasan  Norwegian    Finnish      0.001685     11.663   28933  27934 593124
136 | result:	Mbuti	BolshoyOleniOstrov     French    Finnish      0.001962     16.737   27249  26175 547486
137 | result:	Mbuti	BolshoyOleniOstrov  Icelandic    Finnish      0.001084      7.776   26876  26282 547486
138 | result:	Mbuti	BolshoyOleniOstrov Lithuanian    Finnish      0.000554      3.942   26683  26380 547486
139 | result:	Mbuti	BolshoyOleniOstrov  Norwegian    Finnish      0.000952      6.707   26873  26351 547486
140 | ```
141 | 
142 | Here, the key columns are columns 2, 3, 4 and 5, denoting A, B, C and D, and column 6 and 7, which denote the F4 statistic and the Z score, measuring significance in difference from zero.
143 | 
144 | As you can see, in all cases, the Z score is positive and larger than 3, indicating a significant deviation from zero, and implying gene flow between Nganasan and Finnish, and BolshoyOleniOstrov and Finnish, when compared to French, Icelandic, Lithuanian or Norwegian.
145 | 
146 | ## Outgroup-F3-Statistics
147 | 
148 | Outgroup F3 statistics are a special case how to use F3 statistics. The definition is the same as for Admixture F3 statistics, but instead of a target C and two source populations A and B, one now gives an outgroup C and two test populations A and B.
149 | 
150 | To get an intuition for this statistics, consider the following tree:
151 | 
152 | <img src="img/outgroupf3_phylogeny.png" alt="Outgroup-F3-phylogeny" style="width: 300px;"/>
153 | 
154 | In this scenario, the statistic F3(A, B; C) measures the branch length from C to the common ancestor of A and B, coloured red. So this statistic is simply a measure of how closely two population A and B are related with each other, as measured from a distant outgroup. It is thus a similarity measure: The higher the statistic, the more genetically similar A and B are to one another.
155 | 
156 | We can use this statistic to measure for example the the genetic affinity to East Asia, by performing the statistic F3(Han, X; Mbuti), where Mbuti is a distant African population and acts as outgroup here, Han denote Han Chinese, and X denotes various European populations that we want to test.
157 | 
158 | You need to start, again, by preparing a list of population triples to be measured. I suggest the following list:
159 | 
160 | ```
161 | Han Chuvash Mbuti
162 | Han Albanian Mbuti
163 | Han Armenian Mbuti
164 | Han Bulgarian Mbuti
165 | Han Czech Mbuti
166 | Han Druze Mbuti
167 | Han English Mbuti
168 | Han Estonian Mbuti
169 | Han Finnish Mbuti
170 | Han French Mbuti
171 | Han Georgian Mbuti
172 | Han Greek Mbuti
173 | Han Hungarian Mbuti
174 | Han Icelandic Mbuti
175 | Han Italian_North Mbuti
176 | Han Italian_South Mbuti
177 | Han Lithuanian Mbuti
178 | Han Maltese Mbuti
179 | Han Mordovian Mbuti
180 | Han Norwegian Mbuti
181 | Han Orcadian Mbuti
182 | Han Russian Mbuti
183 | Han Sardinian Mbuti
184 | Han Scottish Mbuti
185 | Han Sicilian Mbuti
186 | Han Spanish_North Mbuti
187 | Han Spanish Mbuti
188 | Han Ukrainian Mbuti
189 | Han Levanluhta Mbuti
190 | Han BolshoyOleniOstrov Mbuti
191 | Han ChalmnyVarre Mbuti
192 | Han Saami.DG Mbuti
193 | ```	
194 | 
195 | which cycles through many populations from Europe, including the ancient individuals from Chalmny Varre, Bolshoy Oleni Ostrov and Levänluhta.
196 | 
197 | **Exercise:** Copy this list into a file, and prepare a parameter file for running `qp3Pop`, similar to the parameter file for admixture F3 statistics above, and run `qp3Pop` with that parameter file as above. Note that here you don't need the line beginning with `inbreed`. This will take up to 10 minutes.
198 | 
199 | You should find this (skipping header lines from the output):
200 | 
201 | ```
202 |            Source 1             Source 2   Target        f_3       std. err           Z    SNPs
203 | result:        Han              Chuvash    Mbuti   0.233652       0.002072     112.782  502678
204 | result:        Han             Albanian    Mbuti   0.215629       0.002029     106.291  501734
205 | result:        Han             Armenian    Mbuti   0.213724       0.001963     108.882  504370
206 | result:        Han            Bulgarian    Mbuti   0.216193       0.001979     109.266  504310
207 | result:        Han                Czech    Mbuti   0.218060       0.002002     108.939  504089
208 | result:        Han                Druze    Mbuti   0.209551       0.001919     109.205  510853
209 | result:        Han              English    Mbuti   0.216959       0.001973     109.954  504161
210 | result:        Han             Estonian    Mbuti   0.220730       0.002019     109.332  503503
211 | result:        Han              Finnish    Mbuti   0.223447       0.002044     109.345  502217
212 | result:        Han               French    Mbuti   0.216623       0.001969     110.012  509613
213 | result:        Han             Georgian    Mbuti   0.214295       0.001935     110.721  503598
214 | result:        Han                Greek    Mbuti   0.215203       0.001984     108.465  507475
215 | result:        Han            Hungarian    Mbuti   0.217894       0.001999     109.004  507409
216 | result:        Han            Icelandic    Mbuti   0.218683       0.002015     108.553  504655
217 | result:        Han        Italian_North    Mbuti   0.215332       0.001978     108.854  507589
218 | result:        Han        Italian_South    Mbuti   0.211787       0.002271      93.265  492400
219 | result:        Han           Lithuanian    Mbuti   0.219615       0.002032     108.098  503681
220 | result:        Han              Maltese    Mbuti   0.210359       0.001956     107.542  503985
221 | result:        Han            Mordovian    Mbuti   0.223469       0.002008     111.296  503441
222 | result:        Han            Norwegian    Mbuti   0.218873       0.002023     108.197  504621
223 | result:        Han             Orcadian    Mbuti   0.217773       0.002014     108.115  504993
224 | result:        Han              Russian    Mbuti   0.223993       0.001995     112.274  506525
225 | result:        Han            Sardinian    Mbuti   0.213230       0.001980     107.711  508413
226 | result:        Han             Scottish    Mbuti   0.218489       0.002039     107.145  499784
227 | result:        Han             Sicilian    Mbuti   0.212272       0.001975     107.486  505477
228 | result:        Han        Spanish_North    Mbuti   0.215885       0.002029     106.383  500853
229 | result:        Han              Spanish    Mbuti   0.213869       0.001975     108.297  513648
230 | result:        Han            Ukrainian    Mbuti   0.218716       0.002007     108.950  503981
231 | result:        Han           Levanluhta    Mbuti   0.236252       0.002383      99.123  263049
232 | result:        Han   BolshoyOleniOstrov    Mbuti   0.247814       0.002177     113.849  457102
233 | result:        Han         ChalmnyVarre    Mbuti   0.233499       0.002304     101.345  366220
234 | result:        Han             Saami.DG    Mbuti   0.236198       0.002274     103.852  489038
235 | ```
236 | 
237 | Now it’s time to plot these results using R. Copy the results (all lines from the output beginning with “results:”) into a text file named "f3_outgroup_stats_Han.txt", and load it into an R tibble using:
238 | 
239 | ```{r}
240 | d <- readr::read_delim(
241 |   "f3_outgroup_stats_Han.txt", 
242 |   delim = " ",
243 |   trim_ws = T,
244 |   col_names = c("dummy", "A", "B", "C", "F3", "StdErr", "Z", "SNPS")
245 | )
246 | ```
247 | 
248 | We can check that it worked:
249 | 
250 | ```{r}
251 | d
252 | ```
253 | 
254 | ```{r}
255 | library(ggplot2)
256 | 
257 | d %>%
258 |   ggplot() +
259 |   geom_errorbarh(
260 |     aes(
261 |       xmin = F3 - StdErr,
262 |       xmax = F3 + StdErr,
263 |       y = forcats::fct_reorder(B, F3)
264 |     )
265 |   ) +
266 |   geom_point(
267 |     aes(x = F3, y = forcats::fct_reorder(B, F3))
268 |   ) +
269 |   xlab("F3(Han, Test; Mbuti)")
270 | ```
271 | 
272 | As expected, the ancient samples and modern Saami are the ones with the highest allele sharing with present-day East Asians (as represented by Han) compared to many other Europeans.
273 | 
274 | ## Outgroup F3 Statistics Scatter plot
275 | 
276 | The above plot shows an intriguing cline of differential relatedness to Han in many Europeans. For example, would you have guessed that Icelandics are closer to Han than Armenians are to Han? This is very surprising, and it shows that European ancestry has a complex relationship to East Asians. To understand this better, you can read [Patterson 2012](http://www.genetics.org/content/early/2012/09/06/genetics.112.145037), who makes some intriguing observations. Patterson and colleagues use Admixture F3 statistics and apply it to many populations world-wide. They summarise some population triples with the most negative F3 statistics in the following table:
277 | 
278 | <img src="img/Patterson_2012_table.png" alt="Patterson 2012 table" style="width: 450px;"/>
279 | 
280 | There are many interesting results here, but one of the most striking one is the finding of F3(Sardinian, Karitiana; French), which is highly significantly negative. This statistics implies that French are admixed between Sardinians and Karitiana, a Native American population from Brazil. How is that possible? We can of course rule out any recent Native American backflow into Europe.
281 | 
282 | Patterson and colleagues explained this finding with hypothesising an ancient admixture event, from a Siberian population that contributed to both Europeans and to Native Americans. They termed that population the “Ancient North Eurasians (ANE)”. The following admixture graph was suggested:
283 | 
284 | <img src="img/Patterson_2012_ANEfig.png" alt="Patterson 2012 ANE graph" style="width: 600px;"/>
285 | 
286 | As you can see, the idea is that modern Central Europeans, such as French, are admixed between Southern Europeans (Sardinians) and ANE. The Ancient North Eurasians are a classic example for a “Ghost” population, a population which does not exist anymore in unmixed form, and from which we have no direct individual representative.
287 | 
288 | Amazingly, two years after the publication of [Patterson 2012](http://www.genetics.org/content/early/2012/09/06/genetics.112.145037), the ANE ghost population was actually found: [Raghavan et al.](https://www.nature.com/articles/nature12736) and colleagues, in 2014, published a paper called “Upper Palaeolithic Siberian genome reveals dual ancestry of Native Americans”. A 24,000 year old boy (called MA1) from the site of “Mal’ta” in Siberia was shown to have close genetic affinity with both Europeans and in particular Native Americans, just as proposed in [Patterson 2012](http://www.genetics.org/content/early/2012/09/06/genetics.112.145037).
289 | 
290 | The affinities are summarised nicely in this figure from [Raghavan et al.](https://www.nature.com/articles/nature12736):
291 | 
292 | <img src="img/MA1_affinities.png" alt="MA1 Affinities" style="width: 600px;"/>
293 | 
294 | OK, so we now know that ancestry related to Native Americans contributed to European countries. Could that possibly explain the affinity of our ancient samples and Saami to Han Chinese in some way? To test this, we will run the same Outgroup F3 statistics as above, but this time not with Han but with MA1 as test population. Specifically, we run the following population triples in `qp3Pop`:
295 | 
296 | ```
297 | MA1_HG.SG Chuvash Mbuti
298 | MA1_HG.SG Albanian Mbuti
299 | MA1_HG.SG Armenian Mbuti
300 | MA1_HG.SG Bulgarian Mbuti
301 | MA1_HG.SG Czech Mbuti
302 | MA1_HG.SG Druze Mbuti
303 | MA1_HG.SG English Mbuti
304 | MA1_HG.SG Estonian Mbuti
305 | MA1_HG.SG Finnish Mbuti
306 | MA1_HG.SG French Mbuti
307 | MA1_HG.SG Georgian Mbuti
308 | MA1_HG.SG Greek Mbuti
309 | MA1_HG.SG Hungarian Mbuti
310 | MA1_HG.SG Icelandic Mbuti
311 | MA1_HG.SG Italian_North Mbuti
312 | MA1_HG.SG Italian_South Mbuti
313 | MA1_HG.SG Lithuanian Mbuti
314 | MA1_HG.SG Maltese Mbuti
315 | MA1_HG.SG Mordovian Mbuti
316 | MA1_HG.SG Norwegian Mbuti
317 | MA1_HG.SG Orcadian Mbuti
318 | MA1_HG.SG Russian Mbuti
319 | MA1_HG.SG Sardinian Mbuti
320 | MA1_HG.SG Scottish Mbuti
321 | MA1_HG.SG Sicilian Mbuti
322 | MA1_HG.SG Spanish_North Mbuti
323 | MA1_HG.SG Spanish Mbuti
324 | MA1_HG.SG Ukrainian Mbuti
325 | MA1_HG.SG Levanluhta Mbuti
326 | MA1_HG.SG BolshoyOleniOstrov Mbuti
327 | MA1_HG.SG ChalmnyVarre Mbuti
328 | MA1_HG.SG Saami.DG Mbuti
329 | ```
330 | 
331 | here, `MA1_HG.SG` is the somewhat cryptic population name for the MA1-individual.
332 | 
333 | **Exercise:** Follow the same protocol as above: Copy the list into a file, prepare a parameter file for `qp3Pop` with that population triple list, and run `qp3Pop`. Copy the results (all lines beginning with “results:”) into a file, named "f3_outgroup_stats_MA1.txt"
334 | 
335 | To test in what way the relationship to Han Chinese is correlated with the relationship with MA1, we will now plot the two statistics against each other in a scatter plot. We first have to merge the two outgroup-F3 datasets together. Here is the code including loading (assuming that the two F3 dataframes are called `outgroupf3dat_Han` and `outgroupf3dat_MA1`):
336 | 
337 | ```{r}
338 | outgroupf3dat_Han <- readr::read_delim(
339 |   "f3_outgroup_stats_Han.txt", 
340 |   delim = " ",
341 |   trim_ws = T,
342 |   col_names = c("dummy", "A", "B", "C", "F3", "stderr", "Z", "nSNPs")
343 | )
344 | 
345 | outgroupf3dat_MA1 <- readr::read_delim(
346 |   "f3_outgroup_stats_MA1.txt", 
347 |   delim = " ",
348 |   trim_ws = T,
349 |   col_names = c("dummy", "A", "B", "C", "F3", "stderr", "Z", "nSNPs")
350 | )
351 | 
352 | outgroupf3dat_merged <- dplyr::full_join(
353 |   outgroupf3dat_Han,
354 |   outgroupf3dat_MA1,
355 |   by = "B",
356 |   suffix = c("_Han", "_MA1")
357 | )
358 | ```
359 | 
360 | Again, we check that everything worked:
361 | 
362 | ```{r}
363 | outgroupf3dat_merged
364 | ```
365 | 
366 | Now we can make a scatter plot:
367 | 
368 | ```{r}
369 | outgroupf3dat_merged %>%
370 |   ggplot() +
371 |   geom_point(
372 |     aes(
373 |       x = F3_Han, 
374 |       y = F3_MA1
375 |     )
376 |   ) +
377 |   xlab("F3(Test, Han; Mbuti)") +
378 |   ylab("F3(Test, MA1; Mbuti)")
379 | ```
380 | 
381 | This isn’t very useful, however, as we cannot see which point is which population. We use the `geom_label_repel` function from ggrepel to add text labels to each point:
382 | 
383 | ```{r}
384 | outgroupf3dat_merged %>%
385 |   ggplot() +
386 |   geom_point(
387 |     aes(
388 |       x = F3_Han, 
389 |       y = F3_MA1
390 |     )
391 |   ) +
392 |   xlab("F3(Test, Han; Mbuti)") +
393 |   ylab("F3(Test, MA1; Mbuti)") +
394 |   ggrepel::geom_label_repel(
395 |     aes(
396 |       x = F3_Han, 
397 |       y = F3_MA1,
398 |       label = B
399 |     )
400 |   )
401 | ```
402 | 
403 | The result shows that indeed the affinity to East Asians in the bulk of European contries can be explained by MA1-related ancestry. Most European countries have a linear relationship between their affinity to Han and their affinity to MA1. However, this is not true for our ancient samples from Fennoscandia and for modern Saami and Chuvash, who have extra affinity to Han not explained by MA1 ([Lazaridis et al. 2014](https://www.nature.com/articles/nature13673)).
404 | 
405 | Now, why there is a connection between MA1 and Han is not trivial to explain. The most probable explanation involves "Basal Eurasian" ancestry, which happens to be anti-correlated to MA1-ancestry in Europe, and which drives those populations with high "Basal Eurasian" ancestry further away from Han. See [Lazaridis et al. 2014](https://www.nature.com/articles/nature13673) for more details.
406 | 


--------------------------------------------------------------------------------
/pca.AllEurasia.eval:
--------------------------------------------------------------------------------
   1 |    71.715980
   2 |     9.646096
   3 |     6.317298
   4 |     3.871764
   5 |     3.075333
   6 |     2.646867
   7 |     2.454284
   8 |     2.236205
   9 |     2.006447
  10 |     2.000151
  11 |     1.949834
  12 |     1.946900
  13 |     1.913285
  14 |     1.909476
  15 |     1.886648
  16 |     1.873806
  17 |     1.865762
  18 |     1.844984
  19 |     1.826839
  20 |     1.818292
  21 |     1.813557
  22 |     1.806923
  23 |     1.794645
  24 |     1.787562
  25 |     1.764851
  26 |     1.756468
  27 |     1.741944
  28 |     1.735706
  29 |     1.725703
  30 |     1.721535
  31 |     1.718770
  32 |     1.712666
  33 |     1.699156
  34 |     1.689564
  35 |     1.685036
  36 |     1.677270
  37 |     1.672807
  38 |     1.666883
  39 |     1.660374
  40 |     1.653085
  41 |     1.650461
  42 |     1.643042
  43 |     1.636804
  44 |     1.634764
  45 |     1.628574
  46 |     1.623305
  47 |     1.615182
  48 |     1.601129
  49 |     1.595264
  50 |     1.583416
  51 |     1.576798
  52 |     1.574901
  53 |     1.566180
  54 |     1.559824
  55 |     1.556275
  56 |     1.549500
  57 |     1.542913
  58 |     1.540315
  59 |     1.531804
  60 |     1.528939
  61 |     1.521507
  62 |     1.515997
  63 |     1.512632
  64 |     1.512015
  65 |     1.507849
  66 |     1.499872
  67 |     1.496241
  68 |     1.493827
  69 |     1.486508
  70 |     1.483965
  71 |     1.478250
  72 |     1.469645
  73 |     1.467586
  74 |     1.458038
  75 |     1.452991
  76 |     1.449844
  77 |     1.449182
  78 |     1.446437
  79 |     1.438036
  80 |     1.433108
  81 |     1.431148
  82 |     1.428462
  83 |     1.421567
  84 |     1.417396
  85 |     1.416161
  86 |     1.411774
  87 |     1.410701
  88 |     1.406160
  89 |     1.405373
  90 |     1.399438
  91 |     1.393987
  92 |     1.388933
  93 |     1.384344
  94 |     1.381818
  95 |     1.379500
  96 |     1.374324
  97 |     1.367127
  98 |     1.364338
  99 |     1.354982
 100 |     1.351974
 101 |     1.350672
 102 |     1.348083
 103 |     1.345424
 104 |     1.341461
 105 |     1.337633
 106 |     1.333327
 107 |     1.330888
 108 |     1.328745
 109 |     1.325143
 110 |     1.321715
 111 |     1.318676
 112 |     1.318241
 113 |     1.314366
 114 |     1.311146
 115 |     1.308522
 116 |     1.306540
 117 |     1.304946
 118 |     1.301508
 119 |     1.297752
 120 |     1.297505
 121 |     1.295093
 122 |     1.291474
 123 |     1.289045
 124 |     1.286803
 125 |     1.284147
 126 |     1.282275
 127 |     1.280667
 128 |     1.279286
 129 |     1.276059
 130 |     1.275587
 131 |     1.273570
 132 |     1.272469
 133 |     1.269639
 134 |     1.266366
 135 |     1.263176
 136 |     1.260103
 137 |     1.256532
 138 |     1.254596
 139 |     1.252190
 140 |     1.250768
 141 |     1.249041
 142 |     1.244996
 143 |     1.244038
 144 |     1.242172
 145 |     1.238261
 146 |     1.236299
 147 |     1.234178
 148 |     1.231280
 149 |     1.229033
 150 |     1.228112
 151 |     1.226033
 152 |     1.221697
 153 |     1.219402
 154 |     1.218948
 155 |     1.217277
 156 |     1.215639
 157 |     1.213122
 158 |     1.208787
 159 |     1.207875
 160 |     1.205892
 161 |     1.204942
 162 |     1.203346
 163 |     1.200594
 164 |     1.198044
 165 |     1.195350
 166 |     1.193223
 167 |     1.191716
 168 |     1.190070
 169 |     1.189331
 170 |     1.187069
 171 |     1.185844
 172 |     1.183531
 173 |     1.181102
 174 |     1.180088
 175 |     1.177938
 176 |     1.175316
 177 |     1.174158
 178 |     1.171976
 179 |     1.170660
 180 |     1.167873
 181 |     1.165652
 182 |     1.164344
 183 |     1.163405
 184 |     1.159586
 185 |     1.158722
 186 |     1.158274
 187 |     1.156118
 188 |     1.154081
 189 |     1.152420
 190 |     1.151642
 191 |     1.149088
 192 |     1.147936
 193 |     1.146993
 194 |     1.144313
 195 |     1.141345
 196 |     1.137728
 197 |     1.137247
 198 |     1.136279
 199 |     1.135313
 200 |     1.133580
 201 |     1.131784
 202 |     1.131469
 203 |     1.129439
 204 |     1.127615
 205 |     1.126348
 206 |     1.125415
 207 |     1.124355
 208 |     1.121443
 209 |     1.120501
 210 |     1.119133
 211 |     1.118446
 212 |     1.117300
 213 |     1.116887
 214 |     1.115978
 215 |     1.113715
 216 |     1.112566
 217 |     1.112342
 218 |     1.109144
 219 |     1.107941
 220 |     1.106964
 221 |     1.105361
 222 |     1.105044
 223 |     1.102753
 224 |     1.101930
 225 |     1.100408
 226 |     1.099652
 227 |     1.098429
 228 |     1.098332
 229 |     1.098243
 230 |     1.094135
 231 |     1.093516
 232 |     1.092382
 233 |     1.091670
 234 |     1.090078
 235 |     1.089586
 236 |     1.088270
 237 |     1.086303
 238 |     1.085263
 239 |     1.084290
 240 |     1.083358
 241 |     1.082818
 242 |     1.082273
 243 |     1.080266
 244 |     1.079481
 245 |     1.077849
 246 |     1.076985
 247 |     1.076192
 248 |     1.076136
 249 |     1.073774
 250 |     1.072358
 251 |     1.072124
 252 |     1.071058
 253 |     1.069525
 254 |     1.069366
 255 |     1.067774
 256 |     1.067285
 257 |     1.065857
 258 |     1.064099
 259 |     1.063845
 260 |     1.062725
 261 |     1.061943
 262 |     1.060416
 263 |     1.060043
 264 |     1.059428
 265 |     1.058306
 266 |     1.057950
 267 |     1.057505
 268 |     1.057060
 269 |     1.054535
 270 |     1.053238
 271 |     1.053102
 272 |     1.052623
 273 |     1.051572
 274 |     1.050980
 275 |     1.050070
 276 |     1.049240
 277 |     1.047290
 278 |     1.046165
 279 |     1.045326
 280 |     1.044794
 281 |     1.043890
 282 |     1.043100
 283 |     1.042825
 284 |     1.041543
 285 |     1.040521
 286 |     1.038824
 287 |     1.038218
 288 |     1.037671
 289 |     1.036877
 290 |     1.036013
 291 |     1.035027
 292 |     1.034440
 293 |     1.033807
 294 |     1.032238
 295 |     1.031766
 296 |     1.030900
 297 |     1.029723
 298 |     1.029454
 299 |     1.029267
 300 |     1.028585
 301 |     1.027856
 302 |     1.027158
 303 |     1.026376
 304 |     1.025794
 305 |     1.024513
 306 |     1.024265
 307 |     1.022640
 308 |     1.022056
 309 |     1.022035
 310 |     1.021338
 311 |     1.020752
 312 |     1.019220
 313 |     1.018966
 314 |     1.018401
 315 |     1.016882
 316 |     1.016381
 317 |     1.016260
 318 |     1.015723
 319 |     1.015156
 320 |     1.013542
 321 |     1.013257
 322 |     1.012991
 323 |     1.011783
 324 |     1.011264
 325 |     1.010738
 326 |     1.009866
 327 |     1.009583
 328 |     1.008919
 329 |     1.007864
 330 |     1.007229
 331 |     1.006901
 332 |     1.005933
 333 |     1.005583
 334 |     1.004380
 335 |     1.003731
 336 |     1.003455
 337 |     1.002697
 338 |     1.002511
 339 |     1.001233
 340 |     1.000980
 341 |     1.000107
 342 |     0.999920
 343 |     0.999383
 344 |     0.998479
 345 |     0.997897
 346 |     0.997478
 347 |     0.997201
 348 |     0.995749
 349 |     0.995228
 350 |     0.994264
 351 |     0.993564
 352 |     0.993059
 353 |     0.992377
 354 |     0.991550
 355 |     0.991430
 356 |     0.990713
 357 |     0.990020
 358 |     0.989282
 359 |     0.989015
 360 |     0.988423
 361 |     0.988142
 362 |     0.987656
 363 |     0.986261
 364 |     0.985854
 365 |     0.985454
 366 |     0.985131
 367 |     0.984279
 368 |     0.983098
 369 |     0.982697
 370 |     0.982245
 371 |     0.981888
 372 |     0.981737
 373 |     0.981172
 374 |     0.980173
 375 |     0.979647
 376 |     0.979371
 377 |     0.978758
 378 |     0.978358
 379 |     0.977391
 380 |     0.976937
 381 |     0.976641
 382 |     0.976026
 383 |     0.975142
 384 |     0.974387
 385 |     0.973590
 386 |     0.973458
 387 |     0.973009
 388 |     0.972056
 389 |     0.971135
 390 |     0.970599
 391 |     0.970517
 392 |     0.969697
 393 |     0.969303
 394 |     0.968879
 395 |     0.968092
 396 |     0.967964
 397 |     0.967065
 398 |     0.966825
 399 |     0.966743
 400 |     0.965838
 401 |     0.965401
 402 |     0.964752
 403 |     0.963642
 404 |     0.963346
 405 |     0.962434
 406 |     0.962165
 407 |     0.961905
 408 |     0.961024
 409 |     0.960495
 410 |     0.959737
 411 |     0.959140
 412 |     0.959096
 413 |     0.958226
 414 |     0.957956
 415 |     0.957269
 416 |     0.956886
 417 |     0.956086
 418 |     0.955981
 419 |     0.955657
 420 |     0.955189
 421 |     0.954771
 422 |     0.953468
 423 |     0.953362
 424 |     0.953062
 425 |     0.952075
 426 |     0.951706
 427 |     0.951235
 428 |     0.950837
 429 |     0.950302
 430 |     0.949604
 431 |     0.949190
 432 |     0.948684
 433 |     0.948069
 434 |     0.947813
 435 |     0.947164
 436 |     0.946304
 437 |     0.945771
 438 |     0.945406
 439 |     0.944962
 440 |     0.944757
 441 |     0.944197
 442 |     0.943876
 443 |     0.942923
 444 |     0.942592
 445 |     0.942162
 446 |     0.941549
 447 |     0.941221
 448 |     0.940900
 449 |     0.940533
 450 |     0.939559
 451 |     0.939265
 452 |     0.939117
 453 |     0.938712
 454 |     0.938331
 455 |     0.938069
 456 |     0.937496
 457 |     0.936374
 458 |     0.936015
 459 |     0.935724
 460 |     0.935130
 461 |     0.934824
 462 |     0.934040
 463 |     0.933420
 464 |     0.933205
 465 |     0.932738
 466 |     0.932193
 467 |     0.931963
 468 |     0.931454
 469 |     0.931035
 470 |     0.930492
 471 |     0.929848
 472 |     0.929349
 473 |     0.929121
 474 |     0.928145
 475 |     0.927946
 476 |     0.927775
 477 |     0.927228
 478 |     0.926476
 479 |     0.925830
 480 |     0.924999
 481 |     0.924882
 482 |     0.924624
 483 |     0.924254
 484 |     0.923437
 485 |     0.922936
 486 |     0.922757
 487 |     0.922369
 488 |     0.921947
 489 |     0.921621
 490 |     0.920983
 491 |     0.920648
 492 |     0.920081
 493 |     0.919799
 494 |     0.919478
 495 |     0.919088
 496 |     0.918109
 497 |     0.917490
 498 |     0.917307
 499 |     0.916769
 500 |     0.916590
 501 |     0.915881
 502 |     0.915463
 503 |     0.915134
 504 |     0.914584
 505 |     0.914211
 506 |     0.913969
 507 |     0.913261
 508 |     0.913220
 509 |     0.912676
 510 |     0.912265
 511 |     0.911897
 512 |     0.911728
 513 |     0.911222
 514 |     0.910678
 515 |     0.910456
 516 |     0.910011
 517 |     0.909571
 518 |     0.909092
 519 |     0.908675
 520 |     0.908044
 521 |     0.907784
 522 |     0.907319
 523 |     0.907030
 524 |     0.906959
 525 |     0.906055
 526 |     0.905717
 527 |     0.905481
 528 |     0.905318
 529 |     0.904515
 530 |     0.904088
 531 |     0.903689
 532 |     0.902584
 533 |     0.902340
 534 |     0.902202
 535 |     0.901933
 536 |     0.901337
 537 |     0.900931
 538 |     0.900531
 539 |     0.899700
 540 |     0.899510
 541 |     0.899448
 542 |     0.898828
 543 |     0.898442
 544 |     0.897904
 545 |     0.897538
 546 |     0.896594
 547 |     0.896224
 548 |     0.896043
 549 |     0.895678
 550 |     0.895322
 551 |     0.894900
 552 |     0.894500
 553 |     0.894205
 554 |     0.893792
 555 |     0.893110
 556 |     0.892631
 557 |     0.892244
 558 |     0.891665
 559 |     0.891467
 560 |     0.890949
 561 |     0.890557
 562 |     0.890284
 563 |     0.890120
 564 |     0.889878
 565 |     0.888517
 566 |     0.888418
 567 |     0.888256
 568 |     0.887652
 569 |     0.887366
 570 |     0.886798
 571 |     0.886249
 572 |     0.885959
 573 |     0.885612
 574 |     0.885271
 575 |     0.885018
 576 |     0.884410
 577 |     0.884006
 578 |     0.883420
 579 |     0.882979
 580 |     0.882828
 581 |     0.882098
 582 |     0.881565
 583 |     0.881310
 584 |     0.881150
 585 |     0.880599
 586 |     0.880347
 587 |     0.879740
 588 |     0.879552
 589 |     0.878985
 590 |     0.878718
 591 |     0.878483
 592 |     0.878217
 593 |     0.877969
 594 |     0.877166
 595 |     0.876720
 596 |     0.876588
 597 |     0.875999
 598 |     0.875724
 599 |     0.875312
 600 |     0.875060
 601 |     0.874626
 602 |     0.874394
 603 |     0.873977
 604 |     0.873148
 605 |     0.872821
 606 |     0.872459
 607 |     0.872096
 608 |     0.871806
 609 |     0.871684
 610 |     0.871358
 611 |     0.871084
 612 |     0.870736
 613 |     0.870239
 614 |     0.869744
 615 |     0.869448
 616 |     0.868877
 617 |     0.868834
 618 |     0.868298
 619 |     0.867668
 620 |     0.867114
 621 |     0.866990
 622 |     0.866831
 623 |     0.866211
 624 |     0.865688
 625 |     0.865635
 626 |     0.864710
 627 |     0.864149
 628 |     0.864141
 629 |     0.863711
 630 |     0.862948
 631 |     0.862400
 632 |     0.862224
 633 |     0.861785
 634 |     0.861445
 635 |     0.861259
 636 |     0.861043
 637 |     0.860382
 638 |     0.860117
 639 |     0.859707
 640 |     0.859216
 641 |     0.859209
 642 |     0.858204
 643 |     0.858035
 644 |     0.857629
 645 |     0.857090
 646 |     0.857034
 647 |     0.856552
 648 |     0.855977
 649 |     0.855882
 650 |     0.855628
 651 |     0.855513
 652 |     0.854912
 653 |     0.854641
 654 |     0.854036
 655 |     0.853970
 656 |     0.853373
 657 |     0.852897
 658 |     0.852067
 659 |     0.852048
 660 |     0.851803
 661 |     0.851459
 662 |     0.851016
 663 |     0.850728
 664 |     0.850291
 665 |     0.849942
 666 |     0.849572
 667 |     0.849281
 668 |     0.848894
 669 |     0.848685
 670 |     0.848422
 671 |     0.848015
 672 |     0.847328
 673 |     0.847273
 674 |     0.846640
 675 |     0.846226
 676 |     0.845960
 677 |     0.845485
 678 |     0.845159
 679 |     0.844546
 680 |     0.844416
 681 |     0.844113
 682 |     0.843630
 683 |     0.843193
 684 |     0.842665
 685 |     0.842379
 686 |     0.842040
 687 |     0.841749
 688 |     0.841546
 689 |     0.841290
 690 |     0.841188
 691 |     0.840506
 692 |     0.839818
 693 |     0.839536
 694 |     0.839376
 695 |     0.838980
 696 |     0.838796
 697 |     0.838279
 698 |     0.837635
 699 |     0.837285
 700 |     0.836838
 701 |     0.836294
 702 |     0.836187
 703 |     0.835985
 704 |     0.835624
 705 |     0.835082
 706 |     0.834812
 707 |     0.834301
 708 |     0.834018
 709 |     0.833686
 710 |     0.833486
 711 |     0.833046
 712 |     0.832747
 713 |     0.832353
 714 |     0.832011
 715 |     0.831617
 716 |     0.831215
 717 |     0.830883
 718 |     0.830429
 719 |     0.829964
 720 |     0.829774
 721 |     0.829540
 722 |     0.829070
 723 |     0.828846
 724 |     0.828117
 725 |     0.827983
 726 |     0.827625
 727 |     0.827316
 728 |     0.827115
 729 |     0.826908
 730 |     0.826476
 731 |     0.825891
 732 |     0.825584
 733 |     0.825149
 734 |     0.825076
 735 |     0.824591
 736 |     0.824412
 737 |     0.823907
 738 |     0.823624
 739 |     0.823109
 740 |     0.823052
 741 |     0.822477
 742 |     0.822333
 743 |     0.821695
 744 |     0.821324
 745 |     0.820815
 746 |     0.820577
 747 |     0.820041
 748 |     0.819847
 749 |     0.819615
 750 |     0.819072
 751 |     0.818881
 752 |     0.818542
 753 |     0.818240
 754 |     0.818033
 755 |     0.817741
 756 |     0.817351
 757 |     0.816811
 758 |     0.816287
 759 |     0.815814
 760 |     0.815423
 761 |     0.815192
 762 |     0.815034
 763 |     0.814883
 764 |     0.814052
 765 |     0.813897
 766 |     0.813726
 767 |     0.813660
 768 |     0.812896
 769 |     0.812774
 770 |     0.812149
 771 |     0.811883
 772 |     0.811682
 773 |     0.811341
 774 |     0.811214
 775 |     0.811013
 776 |     0.810373
 777 |     0.810169
 778 |     0.809624
 779 |     0.809076
 780 |     0.808794
 781 |     0.808444
 782 |     0.808326
 783 |     0.808179
 784 |     0.807618
 785 |     0.807567
 786 |     0.807352
 787 |     0.806826
 788 |     0.806653
 789 |     0.806221
 790 |     0.805727
 791 |     0.805221
 792 |     0.804998
 793 |     0.804585
 794 |     0.804224
 795 |     0.803660
 796 |     0.803305
 797 |     0.803221
 798 |     0.802845
 799 |     0.802669
 800 |     0.802409
 801 |     0.801995
 802 |     0.801480
 803 |     0.801126
 804 |     0.800777
 805 |     0.800579
 806 |     0.800107
 807 |     0.799610
 808 |     0.799097
 809 |     0.798955
 810 |     0.798845
 811 |     0.798418
 812 |     0.797948
 813 |     0.797613
 814 |     0.797464
 815 |     0.796897
 816 |     0.796723
 817 |     0.796541
 818 |     0.795860
 819 |     0.795637
 820 |     0.795418
 821 |     0.795167
 822 |     0.794763
 823 |     0.794421
 824 |     0.793827
 825 |     0.793678
 826 |     0.793548
 827 |     0.793303
 828 |     0.792505
 829 |     0.792223
 830 |     0.791879
 831 |     0.791164
 832 |     0.790971
 833 |     0.790681
 834 |     0.790180
 835 |     0.789786
 836 |     0.789691
 837 |     0.789369
 838 |     0.788991
 839 |     0.788721
 840 |     0.788559
 841 |     0.788323
 842 |     0.788091
 843 |     0.787413
 844 |     0.786945
 845 |     0.786669
 846 |     0.786279
 847 |     0.786021
 848 |     0.785453
 849 |     0.785168
 850 |     0.784955
 851 |     0.784383
 852 |     0.784065
 853 |     0.783717
 854 |     0.783495
 855 |     0.783116
 856 |     0.782517
 857 |     0.782418
 858 |     0.781996
 859 |     0.781478
 860 |     0.781150
 861 |     0.780929
 862 |     0.780612
 863 |     0.780346
 864 |     0.779740
 865 |     0.779687
 866 |     0.779626
 867 |     0.779090
 868 |     0.778778
 869 |     0.778558
 870 |     0.778293
 871 |     0.778082
 872 |     0.777478
 873 |     0.777164
 874 |     0.777004
 875 |     0.776450
 876 |     0.776249
 877 |     0.776016
 878 |     0.775638
 879 |     0.775471
 880 |     0.775117
 881 |     0.774738
 882 |     0.774340
 883 |     0.773849
 884 |     0.773749
 885 |     0.773193
 886 |     0.772833
 887 |     0.772437
 888 |     0.772363
 889 |     0.771980
 890 |     0.771546
 891 |     0.770945
 892 |     0.770807
 893 |     0.770712
 894 |     0.770284
 895 |     0.769755
 896 |     0.769364
 897 |     0.768872
 898 |     0.768608
 899 |     0.768006
 900 |     0.767707
 901 |     0.767287
 902 |     0.766956
 903 |     0.766804
 904 |     0.766640
 905 |     0.766513
 906 |     0.765853
 907 |     0.765604
 908 |     0.765247
 909 |     0.765033
 910 |     0.764525
 911 |     0.763868
 912 |     0.763589
 913 |     0.763303
 914 |     0.763255
 915 |     0.762772
 916 |     0.762657
 917 |     0.762382
 918 |     0.761943
 919 |     0.761652
 920 |     0.761166
 921 |     0.760886
 922 |     0.760642
 923 |     0.760246
 924 |     0.759796
 925 |     0.759547
 926 |     0.759167
 927 |     0.758572
 928 |     0.758437
 929 |     0.758402
 930 |     0.757537
 931 |     0.757399
 932 |     0.757261
 933 |     0.757044
 934 |     0.756354
 935 |     0.756024
 936 |     0.755860
 937 |     0.755357
 938 |     0.755136
 939 |     0.754750
 940 |     0.754214
 941 |     0.754005
 942 |     0.753724
 943 |     0.752996
 944 |     0.752836
 945 |     0.752400
 946 |     0.752306
 947 |     0.751759
 948 |     0.751661
 949 |     0.751330
 950 |     0.751168
 951 |     0.751020
 952 |     0.750659
 953 |     0.750007
 954 |     0.749689
 955 |     0.749495
 956 |     0.749119
 957 |     0.748759
 958 |     0.748478
 959 |     0.748065
 960 |     0.747625
 961 |     0.747449
 962 |     0.746687
 963 |     0.746264
 964 |     0.746058
 965 |     0.745328
 966 |     0.744984
 967 |     0.744437
 968 |     0.744369
 969 |     0.744013
 970 |     0.743688
 971 |     0.743510
 972 |     0.743082
 973 |     0.742683
 974 |     0.742467
 975 |     0.742365
 976 |     0.742243
 977 |     0.741437
 978 |     0.741378
 979 |     0.740992
 980 |     0.740443
 981 |     0.740272
 982 |     0.739879
 983 |     0.739771
 984 |     0.739407
 985 |     0.739154
 986 |     0.738702
 987 |     0.738091
 988 |     0.737694
 989 |     0.737644
 990 |     0.737240
 991 |     0.736978
 992 |     0.736598
 993 |     0.736027
 994 |     0.735746
 995 |     0.735229
 996 |     0.734727
 997 |     0.734338
 998 |     0.734315
 999 |     0.734027
1000 |     0.733939
1001 |     0.733623
1002 |     0.733333
1003 |     0.732575
1004 |     0.732473
1005 |     0.732394
1006 |     0.732105
1007 |     0.731576
1008 |     0.731172
1009 |     0.731164
1010 |     0.730483
1011 |     0.730308
1012 |     0.729689
1013 |     0.729551
1014 |     0.729166
1015 |     0.728784
1016 |     0.728480
1017 |     0.728378
1018 |     0.728030
1019 |     0.727821
1020 |     0.727293
1021 |     0.726755
1022 |     0.726355
1023 |     0.726085
1024 |     0.725702
1025 |     0.725261
1026 |     0.724964
1027 |     0.724439
1028 |     0.724318
1029 |     0.723856
1030 |     0.723428
1031 |     0.722977
1032 |     0.722882
1033 |     0.722623
1034 |     0.722001
1035 |     0.721677
1036 |     0.721337
1037 |     0.720897
1038 |     0.720502
1039 |     0.720374
1040 |     0.719693
1041 |     0.719497
1042 |     0.719073
1043 |     0.718917
1044 |     0.718117
1045 |     0.717986
1046 |     0.717598
1047 |     0.716888
1048 |     0.716489
1049 |     0.716429
1050 |     0.716233
1051 |     0.715416
1052 |     0.714722
1053 |     0.714685
1054 |     0.714282
1055 |     0.714057
1056 |     0.713964
1057 |     0.713437
1058 |     0.713171
1059 |     0.712531
1060 |     0.712118
1061 |     0.711659
1062 |     0.711530
1063 |     0.711407
1064 |     0.711102
1065 |     0.710655
1066 |     0.710381
1067 |     0.709936
1068 |     0.709708
1069 |     0.709649
1070 |     0.708671
1071 |     0.708125
1072 |     0.707835
1073 |     0.707500
1074 |     0.707042
1075 |     0.706892
1076 |     0.706112
1077 |     0.706009
1078 |     0.705823
1079 |     0.705104
1080 |     0.704860
1081 |     0.704239
1082 |     0.703932
1083 |     0.703477
1084 |     0.703391
1085 |     0.702766
1086 |     0.702445
1087 |     0.702274
1088 |     0.701940
1089 |     0.701665
1090 |     0.700902
1091 |     0.700590
1092 |     0.700421
1093 |     0.700069
1094 |     0.699486
1095 |     0.699260
1096 |     0.698845
1097 |     0.698525
1098 |     0.698164
1099 |     0.697589
1100 |     0.697331
1101 |     0.697240
1102 |     0.696655
1103 |     0.695899
1104 |     0.695641
1105 |     0.695463
1106 |     0.695160
1107 |     0.695045
1108 |     0.693824
1109 |     0.693280
1110 |     0.693161
1111 |     0.692488
1112 |     0.692260
1113 |     0.691847
1114 |     0.691383
1115 |     0.691129
1116 |     0.690650
1117 |     0.690521
1118 |     0.690001
1119 |     0.689281
1120 |     0.689164
1121 |     0.688493
1122 |     0.688327
1123 |     0.687731
1124 |     0.687431
1125 |     0.686862
1126 |     0.686461
1127 |     0.686314
1128 |     0.685825
1129 |     0.685344
1130 |     0.684978
1131 |     0.684476
1132 |     0.684136
1133 |     0.683983
1134 |     0.683262
1135 |     0.683166
1136 |     0.682451
1137 |     0.682255
1138 |     0.681984
1139 |     0.681503
1140 |     0.680910
1141 |     0.680809
1142 |     0.679644
1143 |     0.679039
1144 |     0.678826
1145 |     0.678661
1146 |     0.678207
1147 |     0.677428
1148 |     0.677068
1149 |     0.676767
1150 |     0.675457
1151 |     0.675406
1152 |     0.675308
1153 |     0.674391
1154 |     0.674118
1155 |     0.673968
1156 |     0.673589
1157 |     0.672906
1158 |     0.672320
1159 |     0.671887
1160 |     0.671262
1161 |     0.670848
1162 |     0.670659
1163 |     0.670100
1164 |     0.669492
1165 |     0.668869
1166 |     0.668393
1167 |     0.667479
1168 |     0.667404
1169 |     0.666943
1170 |     0.666272
1171 |     0.665854
1172 |     0.665310
1173 |     0.665164
1174 |     0.664333
1175 |     0.663281
1176 |     0.662910
1177 |     0.662375
1178 |     0.661634
1179 |     0.661110
1180 |     0.660643
1181 |     0.660077
1182 |     0.659467
1183 |     0.658888
1184 |     0.658330
1185 |     0.658068
1186 |     0.657553
1187 |     0.657307
1188 |     0.656757
1189 |     0.655750
1190 |     0.654891
1191 |     0.654242
1192 |     0.653675
1193 |     0.653497
1194 |     0.653385
1195 |     0.652308
1196 |     0.651295
1197 |     0.650471
1198 |     0.649780
1199 |     0.649056
1200 |     0.648642
1201 |     0.647491
1202 |     0.647090
1203 |     0.646993
1204 |     0.645525
1205 |     0.644970
1206 |     0.644049
1207 |     0.642704
1208 |     0.642170
1209 |     0.641427
1210 |     0.640678
1211 |     0.639895
1212 |     0.639148
1213 |     0.638247
1214 |     0.636725
1215 |     0.636166
1216 |     0.635749
1217 |     0.633287
1218 |     0.631706
1219 |     0.631330
1220 |     0.631145
1221 |     0.630361
1222 |     0.629449
1223 |     0.627495
1224 |     0.626398
1225 |     0.624864
1226 |     0.623195
1227 |     0.622484
1228 |     0.620204
1229 |     0.619257
1230 |     0.618031
1231 |     0.616679
1232 |     0.614482
1233 |     0.612315
1234 |     0.609651
1235 |     0.606166
1236 |     0.605721
1237 |     0.601163
1238 |     0.600258
1239 |     0.598812
1240 |     0.597943
1241 |     0.595561
1242 |     0.594310
1243 |     0.591526
1244 |     0.583090
1245 |     0.581623
1246 |     0.580843
1247 |     0.577479
1248 |     0.575503
1249 |     0.572902
1250 |     0.571719
1251 |     0.564517
1252 |     0.558561
1253 |     0.556191
1254 |     0.549372
1255 |     0.540657
1256 |     0.515586
1257 |     0.508704
1258 |    -0.000000
1259 | 


--------------------------------------------------------------------------------