├── images
├── mfcc1.png
├── english1.png
├── mfcc_flow.png
├── cnn_scores.png
└── accent.gmu_sample.png
├── dataframes
├── df_japanese_male.csv
├── df_italian_female.csv
├── df_vietnamese_female.csv
├── df_german_male.csv
├── df_turkish_female.csv
├── df_vietnamese_male.csv
├── df_polish_female.csv
├── df_polish_male.csv
├── df_japanese_female.csv
├── df_dutch_male.csv
├── df_russian_male.csv
├── df_italian_male.csv
├── df_turkish_male.csv
├── df_german_female.csv
├── df_portuguese_female.csv
├── df_korean_male.csv
├── df_mandarin_male.csv
├── df_russian_female.csv
├── df_dutch_female.csv
├── df_portuguese_male.csv
├── df_korean_female.csv
├── df_french_female.csv
├── df_french_male.csv
├── df_mandarin_female.csv
├── df_arabic_female.csv
├── df_arabic_male.csv
├── df_spanish_female.csv
├── df_spanish_male.csv
├── df_usa_english_male.csv
├── df_usa_english_female.csv
├── df_usa_male.csv
├── df_usa_female.csv
├── df_english_female.csv
└── df_english_male.csv
├── code
├── rnn_example.py
├── conv_1d_model.py
├── conv_1d_model_aws.py
├── testing.py
└── mp3_getter.py
└── README.md
/images/mfcc1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/mfcc1.png
--------------------------------------------------------------------------------
/images/english1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/english1.png
--------------------------------------------------------------------------------
/images/mfcc_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/mfcc_flow.png
--------------------------------------------------------------------------------
/images/cnn_scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/cnn_scores.png
--------------------------------------------------------------------------------
/images/accent.gmu_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/accent.gmu_sample.png
--------------------------------------------------------------------------------
/dataframes/df_japanese_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 223,223,20.0,15.0,"tokyo, japan",japanese4,japanese,male,224,japan
3 | 226,226,18.0,12.0,"osaka, japan",japanese7,japanese,male,227,japan
4 | 485,485,25.0,12.0,"tokyo, japan",japanese8,japanese,male,486,japan
5 | 542,542,24.0,13.0,"yokohama, japan",japanese9,japanese,male,543,japan
6 | 1364,1364,36.0,12.0,"tokyo, japan",japanese12,japanese,male,1365,japan
7 | 1380,1380,28.0,11.0,"gunma, japan",japanese13,japanese,male,1381,japan
8 | 1520,1520,45.0,12.0,"osaka, japan",japanese15,japanese,male,1521,japan
9 | 1832,1832,21.0,14.0,"yokosuka, japan",japanese20,japanese,male,1833,japan
10 | 1941,1941,24.0,5.0,"kawasaki, japan",japanese23,japanese,male,1942,japan
11 |
--------------------------------------------------------------------------------
/dataframes/df_italian_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 216,216,29.0,22.0,"carbonia, italy",italian1,italian,female,217,italy
3 | 219,219,47.0,11.0,"cremona, italy",italian4,italian,female,220,italy
4 | 472,472,32.0,15.0,"naples, italy",italian6,italian,female,473,italy
5 | 680,680,38.0,14.0,"bellinzona, switzerland",italian12,italian,female,681,switzerland
6 | 1176,1176,34.0,14.0,"cuneo, italy",italian23,italian,female,1177,italy
7 | 1405,1405,23.0,6.0,"milan, italy",italian24,italian,female,1406,italy
8 | 1542,1542,33.0,10.0,"milan, italy",italian27,italian,female,1543,italy
9 | 1685,1685,21.0,8.0,"trieste, italy",italian29,italian,female,1686,italy
10 | 1975,1975,59.0,23.0,"naples, italy",italian31,italian,female,1976,italy
11 | 2019,2019,78.0,50.0,"bitonto, italy",italian33,italian,female,2020,italy
12 |
--------------------------------------------------------------------------------
/dataframes/df_vietnamese_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 396,396,28.0,9.0,"vinh long, vietnam",vietnamese3,vietnamese,female,397,vietnam
3 | 397,397,25.0,15.0,"hanoi, vietnam",vietnamese4,vietnamese,female,398,vietnam
4 | 400,400,29.0,18.0,"hue, vietnam",vietnamese7,vietnamese,female,401,vietnam
5 | 1191,1191,57.0,18.0,"quang nam, vietnam",vietnamese10,vietnamese,female,1192,vietnam
6 | 1528,1528,24.0,9.0,"can tho, vietnam",vietnamese12,vietnamese,female,1529,vietnam
7 | 1939,1939,33.0,11.0,"ho chi minh city, vietnam",vietnamese17,vietnamese,female,1940,vietnam
8 | 2009,2009,19.0,11.0,"saigon, vietnam",vietnamese18,vietnamese,female,2010,vietnam
9 | 2012,2012,18.0,4.0,"ho chi minh city, vietnam",vietnamese19,vietnamese,female,2013,vietnam
10 | 2041,2041,46.0,25.0,"haiphong, vietnam",vietnamese20,vietnamese,female,2042,vietnam
11 |
--------------------------------------------------------------------------------
/dataframes/df_german_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 192,192,47.0,12.0,"halle, germany",german5,german,male,193,germany
3 | 193,193,28.0,8.0,"vienna, austria",german6,german,male,194,austria
4 | 500,500,20.0,11.0,"berlin, germany",german7,german,male,501,germany
5 | 544,544,25.0,10.0,"bernburg, germany",german8,german,male,545,germany
6 | 615,615,22.0,8.0,"wuppertal, germany",german9,german,male,616,germany
7 | 634,634,24.0,10.0,"darmstadt, germany",german10,german,male,635,germany
8 | 675,675,35.0,11.0,"innsbruck, austria",german15,german,male,676,austria
9 | 892,892,29.0,6.0,"bad aussee, austria",german18,german,male,893,austria
10 | 935,935,41.0,16.0,"chur, switzerland",german20,german,male,936,switzerland
11 | 1254,1254,24.0,11.0,"herdecke, germany",german23,german,male,1255,germany
12 | 1382,1382,27.0,10.0,"niedersachsen, germany",german24,german,male,1383,germany
13 | 1493,1493,23.0,10.0,"bielefeld, germany",german27,german,male,1494,germany
14 | 1876,1876,39.0,10.0,"stuttgart, germany",german33,german,male,1877,germany
15 | 1878,1878,28.0,11.0,"bochum, germany",german34,german,male,1879,germany
16 |
--------------------------------------------------------------------------------
/dataframes/df_turkish_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 380,380,27.0,26.0,"bingol, turkey",turkish1,turkish,female,381,turkey
3 | 384,384,30.0,11.0,"istanbul, turkey",turkish5,turkish,female,385,turkey
4 | 633,633,24.0,23.0,"kocaeli, turkey",turkish11,turkish,female,634,turkey
5 | 685,685,18.0,3.0,"istanbul, turkey",turkish13,turkish,female,686,turkey
6 | 734,734,23.0,6.0,"istanbul, turkey",turkish14,turkish,female,735,turkey
7 | 1143,1143,19.0,9.0,"kocaeli, turkey",turkish20,turkish,female,1144,turkey
8 | 1209,1209,18.0,4.0,"istanbul, turkey",turkish21,turkish,female,1210,turkey
9 | 1290,1290,37.0,11.0,"adana, turkey",turkish23,turkish,female,1291,turkey
10 | 1729,1729,18.0,7.0,"istanbul, turkey",turkish27,turkish,female,1730,turkey
11 | 1771,1771,24.0,4.0,"istanbul, turkey",turkish28,turkish,female,1772,turkey
12 | 1807,1807,21.0,11.0,"ankara, turkey",turkish29,turkish,female,1808,turkey
13 | 1928,1928,24.0,11.0,"izmir, turkey",turkish30,turkish,female,1929,turkey
14 | 2047,2047,24.0,11.0,"izmir, turkey",turkish35,turkish,female,2048,turkey
15 | 2167,2167,26.0,12.0,"istanbul, turkey",turkish36,turkish,female,2168,turkey
16 |
--------------------------------------------------------------------------------
/dataframes/df_vietnamese_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 394,394,29.0,12.0,"can tho, vietnam",vietnamese1,vietnamese,male,395,vietnam
3 | 395,395,29.0,25.0,"ho chi minh city, vietnam",vietnamese2,vietnamese,male,396,vietnam
4 | 398,398,34.0,31.0,"cam ranh, vietnam",vietnamese5,vietnamese,male,399,vietnam
5 | 399,399,41.0,12.0,"ho chi minh city, vietnam",vietnamese6,vietnamese,male,400,vietnam
6 | 446,446,69.0,12.0,"ninh binh, vietnam",vietnamese8,vietnamese,male,447,vietnam
7 | 1133,1133,20.0,14.0,"ho chi minh city, vietnam",vietnamese9,vietnamese,male,1134,vietnam
8 | 1200,1200,67.0,15.0,"quang nam, vietnam",vietnamese11,vietnamese,male,1201,vietnam
9 | 1561,1561,23.0,6.0,"hanoi, vietnam",vietnamese13,vietnamese,male,1562,vietnam
10 | 1697,1697,51.0,20.0,"ho chi minh city, vietnam",vietnamese14,vietnamese,male,1698,vietnam
11 | 1808,1808,29.0,15.0,"ho chi minh city, vietnam",vietnamese15,vietnamese,male,1809,vietnam
12 | 1849,1849,52.0,17.0,"long xuyen, vietnam",vietnamese16,vietnamese,male,1850,vietnam
13 | 2098,2098,21.0,16.0,"hanoi, vietnam",vietnamese21,vietnamese,male,2099,vietnam
14 | 2150,2150,23.0,8.0,"bien hoa, vietnam",vietnamese22,vietnamese,male,2151,vietnam
15 |
--------------------------------------------------------------------------------
/dataframes/df_polish_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 276,276,26.0,23.0,"janow, poland",polish1,polish,female,277,poland
3 | 277,277,26.0,25.0,"mielec, poland",polish2,polish,female,278,poland
4 | 278,278,31.0,12.0,"nowy sacz, poland",polish3,polish,female,279,poland
5 | 279,279,47.0,15.0,"krakow, poland",polish4,polish,female,280,poland
6 | 625,625,26.0,8.0,"torun, poland",polish6,polish,female,626,poland
7 | 840,840,23.0,15.0,"ostroleka, poland",polish9,polish,female,841,poland
8 | 841,841,30.0,15.0,"wolanow, poland",polish10,polish,female,842,poland
9 | 842,842,56.0,10.0,"warsaw, poland",polish11,polish,female,843,poland
10 | 1244,1244,25.0,9.0,"ketrzyn, poland",polish15,polish,female,1245,poland
11 | 1452,1452,33.0,13.0,"krakow, poland",polish17,polish,female,1453,poland
12 | 1624,1624,53.0,4.0,"gdansk, poland",polish20,polish,female,1625,poland
13 | 1653,1653,20.0,10.0,"koszalin, poland",polish24,polish,female,1654,poland
14 | 1744,1744,20.0,10.0,"koszalin, poland",polish28,polish,female,1745,poland
15 | 1748,1748,21.0,9.0,"koszalin, poland",polish30,polish,female,1749,poland
16 | 1917,1917,34.0,16.0,"trzebnica, poland",polish31,polish,female,1918,poland
17 | 2084,2084,48.0,12.0,"wroclaw, poland",polish34,polish,female,2085,poland
18 |
--------------------------------------------------------------------------------
/dataframes/df_polish_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 481,481,22.0,9.0,"ozarow maz, poland",polish5,polish,male,482,poland
3 | 727,727,20.0,6.0,"przemet, poland",polish7,polish,male,728,poland
4 | 768,768,20.0,11.0,"otwock, poland",polish8,polish,male,769,poland
5 | 882,882,40.0,20.0,"warsaw, poland",polish12,polish,male,883,poland
6 | 957,957,22.0,14.0,"gryfino, western pomerania, poland",polish13,polish,male,958,poland
7 | 1170,1170,97.0,35.0,"warsaw, poland",polish14,polish,male,1171,poland
8 | 1277,1277,19.0,8.0,"koszalin, poland",polish16,polish,male,1278,poland
9 | 1482,1482,18.0,5.0,"bialystok, poland",polish18,polish,male,1483,poland
10 | 1548,1548,24.0,7.0,"belchatow, poland",polish19,polish,male,1549,poland
11 | 1650,1650,30.0,10.0,"kolobrzeg, poland",polish21,polish,male,1651,poland
12 | 1651,1651,19.0,11.0,"koszalin, poland",polish22,polish,male,1652,poland
13 | 1652,1652,21.0,8.0,"lobez, poland",polish23,polish,male,1653,poland
14 | 1670,1670,20.0,11.0,"koszalin, poland",polish25,polish,male,1671,poland
15 | 1736,1736,19.0,16.0,"bytow, poland",polish26,polish,male,1737,poland
16 | 1737,1737,19.0,6.0,"koszalin, poland",polish27,polish,male,1738,poland
17 | 1747,1747,20.0,11.0,"sianow, poland",polish29,polish,male,1748,poland
18 | 2061,2061,42.0,14.0,"torun, poland",polish32,polish,male,2062,poland
19 | 2063,2063,21.0,7.0,"nowa ruda, poland",polish33,polish,male,2064,poland
20 |
--------------------------------------------------------------------------------
/dataframes/df_japanese_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 220,220,25.0,12.0,"tokyo, japan",japanese1,japanese,female,221,japan
3 | 221,221,27.0,12.0,"tokyo, japan",japanese2,japanese,female,222,japan
4 | 222,222,49.0,12.0,"kofu, yamanashi prefecture, japan",japanese3,japanese,female,223,japan
5 | 224,224,25.0,12.0,"chiba, japan",japanese5,japanese,female,225,japan
6 | 225,225,30.0,10.0,"kawasaki, japan",japanese6,japanese,female,226,japan
7 | 826,826,26.0,12.0,"hakodate, japan",japanese10,japanese,female,827,japan
8 | 1045,1045,29.0,13.0,"toyama, japan",japanese11,japanese,female,1046,japan
9 | 1381,1381,37.0,12.0,"akashi, japan",japanese14,japanese,female,1382,japan
10 | 1536,1536,25.0,3.0,"clark field, philippines",japanese16,japanese,female,1537,philippines
11 | 1609,1609,44.0,13.0,"nara, japan",japanese17,japanese,female,1610,japan
12 | 1625,1625,52.0,13.0,"kyoto, japan",japanese18,japanese,female,1626,japan
13 | 1683,1683,57.0,13.0,"naha, japan",japanese19,japanese,female,1684,japan
14 | 1846,1846,53.0,13.0,"tokyo, japan",japanese21,japanese,female,1847,japan
15 | 1880,1880,69.0,12.0,"ashiya, japan",japanese22,japanese,female,1881,japan
16 | 1947,1947,40.0,13.0,"tokyo, japan",japanese24,japanese,female,1948,japan
17 | 1986,1986,52.0,13.0,"kyoto, japan",japanese25,japanese,female,1987,japan
18 | 1990,1990,44.0,13.0,"yokosuka, japan",japanese26,japanese,female,1991,japan
19 | 2122,2122,21.0,13.0,"tokyo, japan",japanese27,japanese,female,2123,japan
20 |
--------------------------------------------------------------------------------
/dataframes/df_dutch_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 56,56,38.0,13.0,"nijmegen, netherlands",dutch1,dutch,male,57,netherlands
3 | 57,57,41.0,14.0,"amsterdam, netherlands",dutch2,dutch,male,58,netherlands
4 | 536,536,19.0,6.0,"alkmaar, netherlands",dutch3,dutch,male,537,netherlands
5 | 742,742,18.0,10.0,"bilzen, flanders, belgium",dutch4,dutch,male,743,belgium
6 | 894,894,68.0,10.0,"amsterdam, netherlands",dutch7,dutch,male,895,netherlands
7 | 912,912,39.0,12.0,"essen, belgium",dutch10,dutch,male,913,belgium
8 | 928,928,41.0,14.0,"brugge, belgium",dutch11,dutch,male,929,belgium
9 | 955,955,36.0,10.0,"oosterbeek, netherlands",dutch12,dutch,male,956,netherlands
10 | 1222,1222,23.0,10.0,"nunspeet, netherlands",dutch15,dutch,male,1223,netherlands
11 | 1261,1261,23.0,14.0,"antwerp, belgium",dutch18,dutch,male,1262,belgium
12 | 1267,1267,23.0,13.0,"antwerp, belgium",dutch23,dutch,male,1268,belgium
13 | 1274,1274,22.0,8.0,"antwerp, belgium",dutch28,dutch,male,1275,belgium
14 | 1299,1299,29.0,7.0,"ede, netherlands",dutch29,dutch,male,1300,netherlands
15 | 1463,1463,21.0,14.0,"brasschaat, belgium",dutch32,dutch,male,1464,belgium
16 | 1483,1483,25.0,9.0,"dordrecht, netherlands",dutch40,dutch,male,1484,netherlands
17 | 1516,1516,41.0,13.0,"deventer, netherlands",dutch41,dutch,male,1517,netherlands
18 | 1550,1550,36.0,8.0,"schiedam, netherlands",dutch42,dutch,male,1551,netherlands
19 | 1704,1704,21.0,11.0,"rotterdam, netherlands",dutch43,dutch,male,1705,netherlands
20 | 1741,1741,22.0,2.0,"sint niklaas, belgium",dutch45,dutch,male,1742,belgium
21 |
--------------------------------------------------------------------------------
/dataframes/df_russian_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 298,298,37.0,36.0,"nizhni novgorod, russia",russian1,russian,male,299,russia
3 | 300,300,54.0,13.0,"ola magadan, russia",russian11,russian,male,301,russia
4 | 307,307,66.0,12.0,"moscow, russia",russian8,russian,male,308,russia
5 | 308,308,23.0,7.0,"moscow, russia",russian9,russian,male,309,russia
6 | 459,459,62.0,53.0,"st. petersburg, russia",russian12,russian,male,460,russia
7 | 471,471,26.0,10.0,"moscow, russia",russian13,russian,male,472,russia
8 | 608,608,33.0,18.0,"pskov, russia",russian14,russian,male,609,russia
9 | 647,647,35.0,11.0,"minsk, belarus",russian16,russian,male,648,belarus
10 | 670,670,37.0,18.0,"pskov, russia",russian17,russian,male,671,russia
11 | 830,830,23.0,13.0,"komsomolsk-on-amur, russia",russian18,russian,male,831,russia
12 | 833,833,40.0,17.0,"zhezkazgan, kazakhstan",russian19,russian,male,834,kazakhstan
13 | 993,993,30.0,11.0,"riga, latvia",russian22,russian,male,994,latvia
14 | 1190,1190,21.0,14.0,"moscow, russia",russian27,russian,male,1191,russia
15 | 1253,1253,21.0,6.0,"chisinau, moldova",russian29,russian,male,1254,moldova
16 | 1278,1278,18.0,12.0,"stavropol, russia",russian30,russian,male,1279,russia
17 | 1310,1310,21.0,4.0,"tallinn, estonia",russian31,russian,male,1311,estonia
18 | 1406,1406,19.0,15.0,"vladivostok, russia",russian34,russian,male,1407,russia
19 | 1466,1466,27.0,16.0,"dresden, germany",russian35,russian,male,1467,germany
20 | 1517,1517,25.0,20.0,"moscow, russia",russian36,russian,male,1518,russia
21 | 1522,1522,31.0,10.0,"st. petersburg, russia",russian37,russian,male,1523,russia
22 | 1707,1707,84.0,18.0,"gomel, belarus",russian38,russian,male,1708,belarus
23 |
--------------------------------------------------------------------------------
/dataframes/df_italian_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 217,217,46.0,18.0,"caltanissetta, sicily, italy",italian2,italian,male,218,italy
3 | 218,218,55.0,21.0,"florence, italy",italian3,italian,male,219,italy
4 | 439,439,52.0,25.0,"vicenza, veneto, italy",italian5,italian,male,440,italy
5 | 552,552,24.0,8.0,"naples, italy",italian7,italian,male,553,italy
6 | 588,588,43.0,10.0,"bari, italy",italian8,italian,male,589,italy
7 | 618,618,18.0,8.0,"turin, italy",italian9,italian,male,619,italy
8 | 626,626,48.0,12.0,"palermo, italy",italian10,italian,male,627,italy
9 | 640,640,30.0,18.0,"cagliari, italy",italian11,italian,male,641,italy
10 | 732,732,20.0,8.0,"enna, italy",italian13,italian,male,733,italy
11 | 760,760,20.0,8.0,"cosenza, italy",italian14,italian,male,761,italy
12 | 838,838,48.0,15.0,"forli, italy",italian15,italian,male,839,italy
13 | 941,941,40.0,14.0,"mantua, italy",italian16,italian,male,942,italy
14 | 994,994,28.0,14.0,"turin, italy",italian17,italian,male,995,italy
15 | 1042,1042,20.0,8.0,"grugliasco, italy",italian18,italian,male,1043,italy
16 | 1113,1113,19.0,11.0,"rome, italy",italian19,italian,male,1114,italy
17 | 1122,1122,32.0,15.0,"naples, italy",italian20,italian,male,1123,italy
18 | 1123,1123,42.0,20.0,"trento, italy",italian21,italian,male,1124,italy
19 | 1174,1174,23.0,6.0,"teramo, italy",italian22,italian,male,1175,italy
20 | 1321,1321,40.0,3.5,"wiesbaden, germany",italian25,italian,male,1322,germany
21 | 1488,1488,49.0,11.0,"siracusa, sicily, italy",italian26,italian,male,1489,italy
22 | 1678,1678,25.0,6.0,"rome, italy",italian28,italian,male,1679,italy
23 | 1740,1740,23.0,11.0,"bologna, italy",italian30,italian,male,1741,italy
24 | 1983,1983,49.0,14.0,"sardinia, italy",italian32,italian,male,1984,italy
25 |
--------------------------------------------------------------------------------
/dataframes/df_turkish_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 381,381,24.0,16.0,"ankara, turkey",turkish2,turkish,male,382,turkey
3 | 382,382,27.0,12.0,"adana, turkey",turkish3,turkish,male,383,turkey
4 | 383,383,20.0,14.0,"kayseri, turkey",turkish4,turkish,male,384,turkey
5 | 385,385,19.0,14.0,"istanbul, turkey",turkish6,turkish,male,386,turkey
6 | 386,386,19.0,13.0,"izmir, turkey",turkish7,turkish,male,387,turkey
7 | 477,477,37.0,17.0,"sivas, turkey",turkish8,turkish,male,478,turkey
8 | 531,531,25.0,11.0,"istanbul, turkey",turkish9,turkish,male,532,turkey
9 | 577,577,27.0,11.0,"ankara, turkey",turkish10,turkish,male,578,turkey
10 | 671,671,25.0,20.0,"giresun, turkey",turkish12,turkish,male,672,turkey
11 | 735,735,18.0,10.0,"ankara, turkey",turkish15,turkish,male,736,turkey
12 | 744,744,25.0,12.0,"kars, turkey",turkish16,turkish,male,745,turkey
13 | 762,762,31.0,6.0,"ankara, turkey",turkish17,turkish,male,763,turkey
14 | 925,925,27.0,9.0,"stuttgart, germany",turkish18,turkish,male,926,germany
15 | 970,970,25.0,18.0,"istanbul, turkey",turkish19,turkish,male,971,turkey
16 | 1281,1281,23.0,6.0,"istanbul, turkey",turkish22,turkish,male,1282,turkey
17 | 1328,1328,31.0,12.0,"istanbul, turkey",turkish24,turkish,male,1329,turkey
18 | 1569,1569,32.0,26.0,"kars, turkey",turkish25,turkish,male,1570,turkey
19 | 1703,1703,45.0,22.0,"sivas, turkey",turkish26,turkish,male,1704,turkey
20 | 2031,2031,24.0,11.0,"diyarbakir, turkey",turkish31,turkish,male,2032,turkey
21 | 2032,2032,24.0,11.0,"diyarbakir, turkey",turkish32,turkish,male,2033,turkey
22 | 2033,2033,24.0,11.0,"diyarbakir, turkey",turkish33,turkish,male,2034,turkey
23 | 2034,2034,24.0,11.0,"diyarbakir, turkey",turkish34,turkish,male,2035,turkey
24 | 2173,2173,19.0,17.0,"panjakent, tajikistan",turkish37,turkish,male,2174,tajikistan
25 |
--------------------------------------------------------------------------------
/dataframes/df_german_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 188,188,48.0,12.0,"dusseldorf, germany",german1,german,female,189,germany
3 | 189,189,29.0,12.0,"bemberg, germany",german2,german,female,190,germany
4 | 190,190,19.0,12.0,"meissen, germany",german3,german,female,191,germany
5 | 191,191,19.0,11.0,"stuttgart, germany",german4,german,female,192,germany
6 | 639,639,20.0,11.0,"frankfurt, germany",german11,german,female,640,germany
7 | 650,650,29.0,14.0,"feldkirch, austria",german12,german,female,651,austria
8 | 651,651,54.0,13.0,"eschen, liechtenstein",german13,german,female,652,liechtenstein
9 | 674,674,30.0,10.0,"vienna, austria",german14,german,female,675,austria
10 | 880,880,31.0,11.0,"bremen, germany",german16,german,female,881,germany
11 | 881,881,53.0,10.0,"frankfurt, germany",german17,german,female,882,germany
12 | 927,927,19.0,11.0,"offenbach, germany",german19,german,female,928,germany
13 | 1063,1063,20.0,10.0,"villach, austria",german21,german,female,1064,austria
14 | 1091,1091,21.0,12.0,"elsterwerda, germany",german22,german,female,1092,germany
15 | 1442,1442,21.0,11.0,"geislingen, germany",german25,german,female,1443,germany
16 | 1451,1451,77.0,24.0,"stuttgart, germany",german26,german,female,1452,germany
17 | 1524,1524,19.0,10.0,"vienna, austria",german28,german,female,1525,austria
18 | 1618,1618,53.0,12.0,"coburg, germany",german29,german,female,1619,germany
19 | 1680,1680,32.0,8.0,"radstadt, austria",german30,german,female,1681,austria
20 | 1684,1684,21.0,10.0,"datteln, germany",german31,german,female,1685,germany
21 | 1706,1706,29.0,12.0,"niedersachsen, germany",german32,german,female,1707,germany
22 | 1971,1971,51.0,11.0,"ingolstadt, germany",german35,german,female,1972,germany
23 | 1993,1993,18.0,14.0,"hamburg, germany",german36,german,female,1994,germany
24 |
--------------------------------------------------------------------------------
/dataframes/df_portuguese_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 280,280,37.0,16.0,"bahia, brazil",portuguese1,portuguese,female,281,brazil
3 | 282,282,18.0,10.0,"brasilia, brazil",portuguese2,portuguese,female,283,brazil
4 | 283,283,18.0,15.0,"blumenau, brazil",portuguese3,portuguese,female,284,brazil
5 | 494,494,40.0,7.0,"sao paulo, brazil",portuguese11,portuguese,female,495,brazil
6 | 811,811,50.0,12.0,"almada, portugal",portuguese18,portuguese,female,812,portugal
7 | 932,932,43.0,17.0,"luanda, angola",portuguese22,portuguese,female,933,angola
8 | 937,937,29.0,10.0,"curitiba, brazil",portuguese23,portuguese,female,938,brazil
9 | 959,959,22.0,11.0,"santos, brazil",portuguese24,portuguese,female,960,brazil
10 | 971,971,20.0,18.0,"rio de janeiro, brazil",portuguese25,portuguese,female,972,brazil
11 | 1108,1108,25.0,10.0,"campo grande, brazil",portuguese26,portuguese,female,1109,brazil
12 | 1127,1127,33.0,8.0,"sao paulo, brazil",portuguese27,portuguese,female,1128,brazil
13 | 1186,1186,36.0,10.0,"sao paulo, brazil",portuguese28,portuguese,female,1187,brazil
14 | 1284,1284,26.0,24.0,"juiz de fora, brazil",portuguese30,portuguese,female,1285,brazil
15 | 1388,1388,22.0,9.0,"sao paulo, brazil",portuguese35,portuguese,female,1389,brazil
16 | 1437,1437,39.0,15.0,"uberlandia, brazil",portuguese36,portuguese,female,1438,brazil
17 | 1453,1453,43.0,26.0,"sao paulo, brazil",portuguese37,portuguese,female,1454,brazil
18 | 1552,1552,26.0,8.0,"sao paulo, brazil",portuguese39,portuguese,female,1553,brazil
19 | 1921,1921,32.0,8.0,"coronel fabriciano, minas gerais, brazil",portuguese43,portuguese,female,1922,brazil
20 | 1929,1929,38.0,24.0,"rio de janeiro, brazil",portuguese44,portuguese,female,1930,brazil
21 | 1940,1940,19.0,7.0,"salvador, brazil",portuguese45,portuguese,female,1941,brazil
22 | 1988,1988,22.0,12.0,"rosario do sul, brazil",portuguese47,portuguese,female,1989,brazil
23 |
--------------------------------------------------------------------------------
/dataframes/df_korean_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 237,237,29.0,13.0,"ui jong bu, south korea",korean1,korean,male,238,south korea
3 | 243,243,32.0,10.0,"seoul, south korea",korean7,korean,male,244,south korea
4 | 473,473,34.0,12.0,"taejon, south korea",korean9,korean,male,474,south korea
5 | 482,482,34.0,13.0,"kwangju, south korea",korean10,korean,male,483,south korea
6 | 558,558,26.0,14.0,"seoul, south korea",korean11,korean,male,559,south korea
7 | 607,607,31.0,31.0,"sokcho, south korea",korean12,korean,male,608,south korea
8 | 1004,1004,42.0,40.0,"pusan, south korea",korean15,korean,male,1005,south korea
9 | 1006,1006,21.0,8.0,"cheju-do, south korea",korean17,korean,male,1007,south korea
10 | 1116,1116,25.0,7.0,"seoul, south korea",korean18,korean,male,1117,south korea
11 | 1141,1141,21.0,12.0,"ulsan, south korea",korean21,korean,male,1142,south korea
12 | 1369,1369,42.0,13.0,"seoul, south korea",korean24,korean,male,1370,south korea
13 | 1506,1506,23.0,21.0,"seoul, south korea",korean26,korean,male,1507,south korea
14 | 1623,1623,51.0,14.0,"haenam, south korea",korean27,korean,male,1624,south korea
15 | 1814,1814,48.0,12.0,"kwangju, south korea",korean35,korean,male,1815,south korea
16 | 1847,1847,25.0,10.0,"inchon, south korea",korean38,korean,male,1848,south korea
17 | 1861,1861,55.0,14.0,"pusan, south korea",korean39,korean,male,1862,south korea
18 | 1949,1949,26.0,3.0,"bayside, new york, usa",korean42,korean,male,1950,usa
19 | 1951,1951,20.0,13.0,"taejon, south korea",korean43,korean,male,1952,south korea
20 | 1970,1970,49.0,14.0,"inchon, south korea",korean44,korean,male,1971,south korea
21 | 1981,1981,28.0,13.0,"inchon, south korea",korean45,korean,male,1982,south korea
22 | 2057,2057,23.0,10.0,"seoul, south korea",korean47,korean,male,2058,south korea
23 | 2074,2074,32.0,27.0,"seoul, south korea",korean48,korean,male,2075,south korea
24 | 2133,2133,25.0,16.0,"seoul, south korea",korean51,korean,male,2134,south korea
25 | 2157,2157,40.0,12.0,"seoul, south korea",korean52,korean,male,2158,south korea
26 |
--------------------------------------------------------------------------------
/dataframes/df_mandarin_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 257,257,43.0,10.0,"jilin city, jilin, china",mandarin3,mandarin,male,258,china
3 | 261,261,22.0,5.0,"berkeley, california, usa",mandarin7,mandarin,male,262,usa
4 | 429,429,29.0,12.0,"jingmen, hubei, china",mandarin8,mandarin,male,430,china
5 | 450,450,38.0,12.0,"shanghai, china",mandarin9,mandarin,male,451,china
6 | 490,490,19.0,3.0,"beijing, china",mandarin10,mandarin,male,491,china
7 | 622,622,23.0,1.0,"singapore, singapore",mandarin12,mandarin,male,623,singapore
8 | 684,684,29.0,13.0,"nantou, taiwan",mandarin13,mandarin,male,685,taiwan
9 | 718,718,49.0,20.0,"dalian, liaoning, china",mandarin14,mandarin,male,719,china
10 | 749,749,32.0,10.0,"taipei, taiwan",mandarin16,mandarin,male,750,taiwan
11 | 795,795,26.0,13.0,"wuhan, hubei, china",mandarin17,mandarin,male,796,china
12 | 915,915,27.0,13.0,"qinzhou, guangxi, china",mandarin19,mandarin,male,916,china
13 | 1153,1153,18.0,15.0,"chengdu, sichuan, china",mandarin27,mandarin,male,1154,china
14 | 1446,1446,45.0,15.0,"shanghai, china",mandarin28,mandarin,male,1447,china
15 | 1489,1489,24.0,4.0,"lanzhou, gansu, china",mandarin29,mandarin,male,1490,china
16 | 1490,1490,27.0,13.0,"taipei, taiwan",mandarin30,mandarin,male,1491,taiwan
17 | 1540,1540,25.0,6.0,"beijing, china",mandarin33,mandarin,male,1541,china
18 | 1613,1613,28.0,12.0,"chengdu, sichuan, china",mandarin40,mandarin,male,1614,china
19 | 1629,1629,21.0,16.0,"hangzhou, zhejiang, china",mandarin44,mandarin,male,1630,china
20 | 1644,1644,37.0,12.0,"pingdingshan, henan, china",mandarin48,mandarin,male,1645,china
21 | 1735,1735,21.0,10.0,"shanghai, china",mandarin49,mandarin,male,1736,china
22 | 1787,1787,25.0,9.0,"hsinchu, taiwan",mandarin50,mandarin,male,1788,taiwan
23 | 1791,1791,25.0,10.0,"tianjin, hebei, china",mandarin51,mandarin,male,1792,china
24 | 1989,1989,20.0,12.0,"shanghai, china",mandarin57,mandarin,male,1990,china
25 | 1999,1999,23.0,7.0,"superior, colorado, usa",mandarin58,mandarin,male,2000,usa
26 | 2116,2116,24.0,7.0,"shanghai, china",mandarin62,mandarin,male,2117,china
27 | 2118,2118,27.0,12.0,"jiaozuo, henan, china",mandarin63,mandarin,male,2119,china
28 |
--------------------------------------------------------------------------------
/code/rnn_example.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import numpy as np
3 |
4 | from keras.optimizers import SGD
5 |
6 |
7 | np.random.seed(1337) # for reproducibility
8 | from keras.preprocessing import sequence
9 | from keras.utils import np_utils
10 | from keras.models import Sequential
11 | from keras.layers.core import Dense, Dropout, Activation
12 | from keras.layers.recurrent import LSTM
13 | from sklearn.cross_validation import train_test_split
14 | from sklearn.metrics import classification_report
15 |
16 | batch_size = 25
17 | hidden_units = 10
18 | nb_classes = 3
19 | print('Loading data...')
20 | X = np.load('top_3_100_split_mfcc.npy')
21 | y = np.load('top_3_100_split_y.npy')
22 |
23 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
24 |
25 | print(len(X_train), 'train sequences')
26 | print(len(X_test), 'test sequences')
27 | print('X_train shape:', X_train.shape)
28 | print('X_test shape:', X_test.shape)
29 | print('y_train shape:', y_train.shape)
30 | print('y_test shape:', y_test.shape)
31 | print('Build model...')
32 |
33 | Y_train = np_utils.to_categorical(y_train, nb_classes)
34 | Y_test = np_utils.to_categorical(y_test, nb_classes)
35 |
36 | model = Sequential()
37 |
38 | #batch_input_shape= (batch_size, X_train.shape[1], X_train.shape[2])
39 |
40 | # note that it is necessary to pass in 3d batch_input_shape if stateful=True
41 | model.add(LSTM(64, return_sequences=True, stateful=False,
42 | batch_input_shape= (batch_size, X_train.shape[1], X_train.shape[2])))
43 | model.add(LSTM(64, return_sequences=True, stateful=False))
44 | model.add(LSTM(64, stateful=False))
45 |
46 |
47 | # add dropout to control for overfitting
48 | model.add(Dropout(.25))
49 |
50 | # squash output onto number of classes in probability space
51 | model.add(Dense(nb_classes, activation='softmax'))
52 |
53 |
54 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
55 |
56 | print("Train...")
57 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=5, validation_data=(X_test, Y_test))
58 |
59 | y_pred=model.predict_classes(X_test, batch_size=batch_size)
60 | print(classification_report(y_test, y_pred))
61 |
--------------------------------------------------------------------------------
/dataframes/df_russian_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 299,299,20.0,5.0,"moscow, russia",russian10,russian,female,300,russia
3 | 301,301,50.0,10.0,"izmail, ukraine",russian2,russian,female,302,ukraine
4 | 302,302,35.0,6.0,"zaporizhzhya, ukraine",russian3,russian,female,303,ukraine
5 | 303,303,68.0,38.0,"moscow, russia",russian4,russian,female,304,russia
6 | 304,304,25.0,15.0,"khabarovsk, russia",russian5,russian,female,305,russia
7 | 305,305,26.0,11.0,"moscow, russia",russian6,russian,female,306,russia
8 | 306,306,54.0,13.0,"kiev, ukraine",russian7,russian,female,307,ukraine
9 | 646,646,34.0,12.0,"ayaguz, kazakhstan",russian15,russian,female,647,kazakhstan
10 | 910,910,28.0,17.0,"kostanai, kazakhstan",russian20,russian,female,911,kazakhstan
11 | 953,953,46.0,10.0,"moscow, russia",russian21,russian,female,954,russia
12 | 1056,1056,21.0,5.0,"zaporizhzhya, ukraine",russian23,russian,female,1057,ukraine
13 | 1072,1072,33.0,19.0,"sochi, russia",russian24,russian,female,1073,russia
14 | 1095,1095,58.0,41.0,"orhei, moldova",russian25,russian,female,1096,moldova
15 | 1096,1096,23.0,17.0,"chisinau, moldova",russian26,russian,female,1097,moldova
16 | 1197,1197,25.0,11.0,"minsk, belarus",russian28,russian,female,1198,belarus
17 | 1313,1313,31.0,14.0,"kiev, ukraine",russian32,russian,female,1314,ukraine
18 | 1316,1316,24.0,17.0,"karaganda, kazakhstan",russian33,russian,female,1317,kazakhstan
19 | 1749,1749,25.0,5.0,"brooklyn, new york, usa",russian39,russian,female,1750,usa
20 | 1773,1773,23.0,8.0,"bishkek, kyrgyzstan",russian40,russian,female,1774,kyrgyzstan
21 | 1843,1843,30.0,10.0,"nizhni novgorod, russia",russian41,russian,female,1844,russia
22 | 1923,1923,24.0,4.0,"moscow, russia",russian42,russian,female,1924,russia
23 | 1931,1931,27.0,8.0,"st. petersburg, russia",russian43,russian,female,1932,russia
24 | 1946,1946,29.0,6.0,"penza, russia",russian44,russian,female,1947,russia
25 | 2026,2026,68.0,14.0,"st. petersburg, russia",russian45,russian,female,2027,russia
26 | 2104,2104,24.0,7.0,"perm, russia",russian46,russian,female,2105,russia
27 | 2106,2106,26.0,6.0,"saransk, russia",russian47,russian,female,2107,russia
28 | 2136,2136,29.0,6.0,"fergana, uzbekistan",russian48,russian,female,2137,uzbekistan
29 |
--------------------------------------------------------------------------------
/dataframes/df_dutch_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 769,769,54.0,13.0,"rotterdam, netherlands",dutch5,dutch,female,770,netherlands
3 | 846,846,46.0,10.0,"brussels, belgium",dutch6,dutch,female,847,belgium
4 | 895,895,35.0,12.0,"alphen aan de ryn, netherlands",dutch8,dutch,female,896,netherlands
5 | 896,896,68.0,12.0,"almelo, netherlands",dutch9,dutch,female,897,netherlands
6 | 1002,1002,33.0,10.0,"zwolle, netherlands",dutch13,dutch,female,1003,netherlands
7 | 1003,1003,35.0,12.0,"zwolle, netherlands",dutch14,dutch,female,1004,netherlands
8 | 1259,1259,23.0,12.0,"antwerp, belgium",dutch16,dutch,female,1260,belgium
9 | 1260,1260,21.0,12.0,"wilrijk, belgium",dutch17,dutch,female,1261,belgium
10 | 1262,1262,22.0,11.0,"heist-op-den-berg, belgium",dutch19,dutch,female,1263,belgium
11 | 1263,1263,21.0,13.0,"vlaams-brabant, belgium",dutch20,dutch,female,1264,belgium
12 | 1264,1264,21.0,13.0,"diest, belgium",dutch21,dutch,female,1265,belgium
13 | 1265,1265,23.0,13.0,"essen, belgium",dutch22,dutch,female,1266,belgium
14 | 1268,1268,23.0,14.0,"seoul, south korea",dutch24,dutch,female,1269,south korea
15 | 1269,1269,23.0,13.0,"wilrijk, belgium",dutch25,dutch,female,1270,belgium
16 | 1270,1270,23.0,13.0,"turnhout, belgium",dutch26,dutch,female,1271,belgium
17 | 1273,1273,21.0,13.0,"antwerp, belgium",dutch27,dutch,female,1274,belgium
18 | 1461,1461,21.0,14.0,"sint-niklaas, belgium",dutch30,dutch,female,1462,belgium
19 | 1462,1462,23.0,13.0,"antwerp, belgium",dutch31,dutch,female,1463,belgium
20 | 1465,1465,22.0,9.0,"mechelen, belgium",dutch33,dutch,female,1466,belgium
21 | 1467,1467,22.0,14.0,"bornem, belgium",dutch34,dutch,female,1468,belgium
22 | 1472,1472,21.0,13.0,"mortsel, belgium",dutch35,dutch,female,1473,belgium
23 | 1475,1475,21.0,12.0,"turnhout, belgium",dutch36,dutch,female,1476,belgium
24 | 1478,1478,21.0,11.0,"antwerp, belgium",dutch37,dutch,female,1479,belgium
25 | 1480,1480,21.0,12.0,"beveren, belgium",dutch38,dutch,female,1481,belgium
26 | 1481,1481,22.0,10.0,"hardinxveld-giessendam, netherlands",dutch39,dutch,female,1482,netherlands
27 | 1739,1739,21.0,12.0,"antwerp, belgium",dutch44,dutch,female,1740,belgium
28 | 1742,1742,22.0,12.0,"hulshout, belgium",dutch46,dutch,female,1743,belgium
29 | 2114,2114,37.0,11.0,"gouda, netherlands",dutch47,dutch,female,2115,netherlands
30 |
--------------------------------------------------------------------------------
/dataframes/df_portuguese_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 281,281,21.0,13.0,"novo hamburgo, rio grande do sul, brazil",portuguese10,portuguese,male,282,brazil
3 | 284,284,20.0,15.0,"cabinda, angola",portuguese4,portuguese,male,285,angola
4 | 285,285,31.0,13.0,"lubango, angola",portuguese5,portuguese,male,286,angola
5 | 286,286,44.0,15.0,"sao paulo, brazil",portuguese6,portuguese,male,287,brazil
6 | 287,287,40.0,11.0,"vitoria, brazil",portuguese7,portuguese,male,288,brazil
7 | 288,288,26.0,7.0,"fortaleza, brazil",portuguese8,portuguese,male,289,brazil
8 | 289,289,18.0,14.0,"sao paulo, brazil",portuguese9,portuguese,male,290,brazil
9 | 512,512,36.0,12.0,"lisbon, portugal",portuguese12,portuguese,male,513,portugal
10 | 519,519,18.0,9.0,"santa maria, rio grande do sul, brazil",portuguese13,portuguese,male,520,brazil
11 | 557,557,25.0,8.0,"sao paulo, brazil",portuguese14,portuguese,male,558,brazil
12 | 614,614,54.0,9.0,"sao paulo, brazil",portuguese15,portuguese,male,615,brazil
13 | 631,631,25.0,15.0,"porto alegre, brazil",portuguese16,portuguese,male,632,brazil
14 | 805,805,18.0,15.0,"salinas, brazil",portuguese17,portuguese,male,806,brazil
15 | 828,828,32.0,4.0,"florianopolis, brazil",portuguese19,portuguese,male,829,brazil
16 | 832,832,21.0,10.0,"campinas, brazil",portuguese20,portuguese,male,833,brazil
17 | 890,890,23.0,17.0,"sao paulo, brazil",portuguese21,portuguese,male,891,brazil
18 | 1283,1283,28.0,8.0,"vitoria, brazil",portuguese29,portuguese,male,1284,brazil
19 | 1285,1285,29.0,16.0,"belo horizonte, brazil",portuguese31,portuguese,male,1286,brazil
20 | 1304,1304,29.0,12.0,"volta redonda, brazil",portuguese32,portuguese,male,1305,brazil
21 | 1315,1315,38.0,28.0,"brasilia, brazil",portuguese33,portuguese,male,1316,brazil
22 | 1375,1375,18.0,14.0,"porto alegre, brazil",portuguese34,portuguese,male,1376,brazil
23 | 1469,1469,25.0,15.0,"rio de janeiro, brazil",portuguese38,portuguese,male,1470,brazil
24 | 1772,1772,23.0,0.0,"london, england, uk",portuguese40,portuguese,male,1773,uk
25 | 1848,1848,22.0,10.0,"mexico city, mexico",portuguese41,portuguese,male,1849,mexico
26 | 1882,1882,24.0,21.0,"luanda, angola",portuguese42,portuguese,male,1883,angola
27 | 1964,1964,65.0,11.0,"cascais, portugal",portuguese46,portuguese,male,1965,portugal
28 | 2119,2119,31.0,26.0,"sao paulo, brazil",portuguese48,portuguese,male,2120,brazil
29 |
--------------------------------------------------------------------------------
/dataframes/df_korean_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 238,238,23.0,14.0,"taegu, south korea",korean2,korean,female,239,south korea
3 | 239,239,19.0,17.0,"seoul, south korea",korean3,korean,female,240,south korea
4 | 240,240,29.0,15.0,"seoul, south korea",korean4,korean,female,241,south korea
5 | 241,241,50.0,12.0,"seoul, south korea",korean5,korean,female,242,south korea
6 | 242,242,21.0,12.0,"sunchun, south korea",korean6,korean,female,243,south korea
7 | 451,451,39.0,13.0,"seoul, south korea",korean8,korean,female,452,south korea
8 | 758,758,50.0,12.0,"pusan, south korea",korean13,korean,female,759,south korea
9 | 891,891,22.0,8.0,"kota kinabalu, malaysia",korean14,korean,female,892,malaysia
10 | 1005,1005,30.0,13.0,"nonsan, south korea",korean16,korean,female,1006,south korea
11 | 1117,1117,21.0,14.0,"seoul, south korea",korean19,korean,female,1118,south korea
12 | 1125,1125,58.0,13.0,"inchon, south korea",korean20,korean,female,1126,south korea
13 | 1144,1144,19.0,8.0,"inchon, south korea",korean22,korean,female,1145,south korea
14 | 1195,1195,49.0,12.0,"seoul, south korea",korean23,korean,female,1196,south korea
15 | 1385,1385,34.0,12.0,"anseong, kyong gi, south korea",korean25,korean,female,1386,south korea
16 | 1662,1662,62.0,13.0,"seoul, south korea",korean28,korean,female,1663,south korea
17 | 1686,1686,19.0,8.0,"masan, south korea",korean29,korean,female,1687,south korea
18 | 1689,1689,49.0,14.0,"seoul, south korea",korean30,korean,female,1690,south korea
19 | 1695,1695,18.0,7.0,"seoul, south korea",korean31,korean,female,1696,south korea
20 | 1714,1714,35.0,28.0,"seoul, south korea",korean32,korean,female,1715,south korea
21 | 1721,1721,30.0,13.0,"seongnam, south korea",korean33,korean,female,1722,south korea
22 | 1813,1813,20.0,11.0,"seoul, south korea",korean34,korean,female,1814,south korea
23 | 1837,1837,51.0,12.0,"taegu, south korea",korean36,korean,female,1838,south korea
24 | 1842,1842,27.0,14.0,"seoul, south korea",korean37,korean,female,1843,south korea
25 | 1942,1942,21.0,17.0,"seoul, south korea",korean40,korean,female,1943,south korea
26 | 1945,1945,50.0,12.0,"seoul, south korea",korean41,korean,female,1946,south korea
27 | 2014,2014,21.0,21.0,"seoul, south korea",korean46,korean,female,2015,south korea
28 | 2097,2097,22.0,7.0,"an yang, south korea",korean49,korean,female,2098,south korea
29 | 2132,2132,57.0,13.0,"taejon, south korea",korean50,korean,female,2133,south korea
30 |
--------------------------------------------------------------------------------
/dataframes/df_french_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 178,178,20.0,12.0,"st. laurent d'onay, france",french1,french,female,179,france
3 | 180,180,22.0,11.0,"nice, france",french3,french,female,181,france
4 | 181,181,31.0,14.0,"abidjan, ivory coast",french4,french,female,182,ivory coast
5 | 182,182,36.0,11.0,"douala, cameroon",french5,french,female,183,cameroon
6 | 183,183,26.0,13.0,"booue, gabon",french6,french,female,184,gabon
7 | 523,523,19.0,9.0,"montreal, quebec, canada",french12,french,female,524,canada
8 | 628,628,23.0,14.0,"paris, france",french14,french,female,629,france
9 | 657,657,19.0,7.0,"montreal, quebec, canada",french16,french,female,658,canada
10 | 821,821,78.0,16.0,"ghent, belgium",french22,french,female,822,belgium
11 | 822,822,76.0,16.0,"ghent, belgium",french23,french,female,823,belgium
12 | 916,916,27.0,11.0,"montreal, quebec, canada",french26,french,female,917,canada
13 | 931,931,38.0,12.0,"algiers, algeria",french27,french,female,932,algeria
14 | 956,956,35.0,11.0,"port-au-prince, haiti",french28,french,female,957,haiti
15 | 1020,1020,54.0,10.0,"cannes, france",french29,french,female,1021,france
16 | 1303,1303,32.0,10.0,"reims, france",french36,french,female,1304,france
17 | 1428,1428,44.0,13.5,"rouen, france",french40,french,female,1429,france
18 | 1508,1508,22.0,20.0,"kinshasa, democratic republic of congo",french42,french,female,1509,democratic republic of congo
19 | 1544,1544,20.0,12.0,"la chaux-de-fonds, switzerland",french44,french,female,1545,switzerland
20 | 1661,1661,66.0,12.0,"landerneau, brittany, france",french47,french,female,1662,france
21 | 1700,1700,19.0,3.0,"douala, cameroon",french48,french,female,1701,cameroon
22 | 1750,1750,67.0,14.0,"paris, france",french50,french,female,1751,france
23 | 1812,1812,56.0,11.0,"lisbon, portugal",french52,french,female,1813,portugal
24 | 1829,1829,19.0,10.0,"creteil, france",french53,french,female,1830,france
25 | 1845,1845,25.0,8.0,"vichy, france",french54,french,female,1846,france
26 | 1888,1888,62.0,12.0,"settat, morocco",french55,french,female,1889,morocco
27 | 1897,1897,21.0,5.0,"douala, cameroon",french56,french,female,1898,cameroon
28 | 1936,1936,25.0,18.0,"douala, cameroon",french58,french,female,1937,cameroon
29 | 1937,1937,20.0,5.0,"paris, france",french59,french,female,1938,france
30 | 1944,1944,20.0,11.0,"paris, france",french60,french,female,1945,france
31 | 1997,1997,41.0,10.0,"rawalpindi, pakistan",french61,french,female,1998,pakistan
32 | 2029,2029,21.0,20.0,"paris, france",french62,french,female,2030,france
33 |
--------------------------------------------------------------------------------
/dataframes/df_french_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 179,179,19.0,14.0,"tunis, tunisia",french2,french,male,180,tunisia
3 | 184,184,18.0,5.0,"rimouski, quebec, canada",french7,french,male,185,canada
4 | 185,185,66.0,16.0,"ghent, belgium",french8,french,male,186,belgium
5 | 411,411,21.0,11.0,"lamentin, martinique",french9,french,male,412,martinique
6 | 499,499,31.0,10.0,"bordeaux, france",french10,french,male,500,france
7 | 513,513,31.0,11.0,"limoges, france",french11,french,male,514,france
8 | 627,627,19.0,12.0,"bordeaux, france",french13,french,male,628,france
9 | 632,632,32.0,12.0,"sens, france",french15,french,male,633,france
10 | 682,682,39.0,14.0,"fribourg, switzerland",french17,french,male,683,switzerland
11 | 740,740,22.0,6.0,"washington, dc, usa",french18,french,male,741,usa
12 | 754,754,39.0,12.0,"grenoble, france",french19,french,male,755,france
13 | 803,803,23.0,12.0,"alma, quebec, canada",french20,french,male,804,canada
14 | 810,810,20.0,9.0,"amiens, france",french21,french,male,811,france
15 | 831,831,47.0,13.0,"mostaganem, algeria",french24,french,male,832,algeria
16 | 859,859,20.0,12.0,"st. louis fr, france",french25,french,male,860,france
17 | 1022,1022,37.0,12.0,"kinshasa, democratic republic of congo",french30,french,male,1023,democratic republic of congo
18 | 1061,1061,28.0,15.0,"paris, france",french31,french,male,1062,france
19 | 1079,1079,60.0,16.0,"kabinda, democratic republic of congo",french32,french,male,1080,democratic republic of congo
20 | 1126,1126,62.0,10.0,"montreal, quebec, canada",french33,french,male,1127,canada
21 | 1159,1159,56.0,16.0,"abidjan, ivory coast",french34,french,male,1160,ivory coast
22 | 1188,1188,27.0,10.0,"toulouse, france",french35,french,male,1189,france
23 | 1305,1305,42.0,14.0,"strasbourg, france",french37,french,male,1306,france
24 | 1423,1423,22.0,8.0,"montreal, quebec, canada",french38,french,male,1424,canada
25 | 1426,1426,28.0,13.0,"pezenas, france",french39,french,male,1427,france
26 | 1473,1473,24.0,12.0,"la massana, andorra",french41,french,male,1474,andorra
27 | 1534,1534,22.0,11.0,"paris, france",french43,french,male,1535,france
28 | 1566,1566,22.0,8.0,"montreal, quebec, canada",french45,french,male,1567,canada
29 | 1642,1642,22.0,10.0,"chatenay malabry, france",french46,french,male,1643,france
30 | 1728,1728,39.0,12.0,"paris, france",french49,french,male,1729,france
31 | 1764,1764,18.0,5.0,"liege, wallonia, belgium",french51,french,male,1765,belgium
32 | 1930,1930,23.0,12.0,"dakar, senegal",french57,french,male,1931,senegal
33 | 2110,2110,37.0,11.0,"ouagadougou, burkina faso",french63,french,male,2111,burkina faso
34 |
--------------------------------------------------------------------------------
/dataframes/df_mandarin_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 255,255,26.0,13.0,"shanxi province, shanxi, china",mandarin1,mandarin,female,256,china
3 | 256,256,38.0,14.0,"nanjing, china",mandarin2,mandarin,female,257,china
4 | 258,258,24.0,6.0,"shanghai, china",mandarin4,mandarin,female,259,china
5 | 259,259,31.0,12.0,"beijing, china",mandarin5,mandarin,female,260,china
6 | 260,260,28.0,12.0,"le shan, sichuan, china",mandarin6,mandarin,female,261,china
7 | 606,606,53.0,13.0,"kao-hsiung, taiwan",mandarin11,mandarin,female,607,taiwan
8 | 745,745,28.0,11.0,"tainan, taiwan",mandarin15,mandarin,female,746,taiwan
9 | 871,871,29.0,13.0,"taoyuan, taiwan",mandarin18,mandarin,female,872,taiwan
10 | 930,930,40.0,12.0,"ping tong, taiwan",mandarin20,mandarin,female,931,taiwan
11 | 983,983,38.0,9.0,"chengdu, sichuan, china",mandarin21,mandarin,female,984,china
12 | 1018,1018,39.0,11.0,"haikou, hainan, china",mandarin22,mandarin,female,1019,china
13 | 1043,1043,46.0,13.0,"kao-hsiung, taiwan",mandarin23,mandarin,female,1044,taiwan
14 | 1088,1088,21.0,10.0,"shanghai, china",mandarin24,mandarin,female,1089,china
15 | 1124,1124,28.0,13.0,"wuxi, jiangsu, china",mandarin25,mandarin,female,1125,china
16 | 1140,1140,31.0,12.0,"emei, sichuan, china",mandarin26,mandarin,female,1141,china
17 | 1518,1518,26.0,13.0,"wenzhou, zhejiang, china",mandarin31,mandarin,female,1519,china
18 | 1526,1526,23.0,12.0,"taipei, taiwan",mandarin32,mandarin,female,1527,taiwan
19 | 1577,1577,31.0,13.0,"shi jia zhuang, hebei, china",mandarin34,mandarin,female,1578,china
20 | 1578,1578,27.0,12.0,"yantai, shandong, china",mandarin35,mandarin,female,1579,china
21 | 1579,1579,32.0,10.0,"beijing, china",mandarin36,mandarin,female,1580,china
22 | 1580,1580,32.0,12.0,"huhot, nei meng gu, china",mandarin37,mandarin,female,1581,china
23 | 1581,1581,33.0,12.0,"changsha, hunan, china",mandarin38,mandarin,female,1582,china
24 | 1582,1582,24.0,12.0,"tie ling, liaoning, china",mandarin39,mandarin,female,1583,china
25 | 1614,1614,34.0,13.0,"songyuan, jilin, china",mandarin41,mandarin,female,1615,china
26 | 1615,1615,47.0,16.0,"yanbian, jilin, china",mandarin42,mandarin,female,1616,china
27 | 1616,1616,24.0,15.0,"datong, shanxi, china",mandarin43,mandarin,female,1617,china
28 | 1630,1630,42.0,12.0,"beijing, china",mandarin45,mandarin,female,1631,china
29 | 1635,1635,43.0,9.0,"jilin city, jilin, china",mandarin46,mandarin,female,1636,china
30 | 1643,1643,28.0,8.0,"beijing, china",mandarin47,mandarin,female,1644,china
31 | 1792,1792,26.0,6.0,"shaoxing, zhejiang, china",mandarin52,mandarin,female,1793,china
32 | 1816,1816,38.0,13.0,"ningbo, zhejiang, china",mandarin53,mandarin,female,1817,china
33 | 1899,1899,25.0,10.0,"chengdu, sichuan, china",mandarin54,mandarin,female,1900,china
34 | 1908,1908,25.0,13.0,"loudi, hunan, china",mandarin55,mandarin,female,1909,china
35 | 1909,1909,26.0,11.0,"baoding, hebei, china",mandarin56,mandarin,female,1910,china
36 | 2018,2018,20.0,6.0,"kunming, yunnan, china",mandarin59,mandarin,female,2019,china
37 | 2108,2108,41.0,13.0,"guiyang, guizhou, china",mandarin60,mandarin,female,2109,china
38 | 2115,2115,33.0,16.0,"shangrao, jiangxi, china",mandarin61,mandarin,female,2116,china
39 | 2128,2128,24.0,10.0,"fuzhou, fujian, china",mandarin64,mandarin,female,2129,china
40 | 2172,2172,20.0,5.0,"tianjin, hebei, china",mandarin65,mandarin,female,2173,china
41 |
--------------------------------------------------------------------------------
/dataframes/df_arabic_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 10,10,38.0,12.0,"riyadh, saudi arabia",arabic1,arabic,female,11,saudi arabia
3 | 12,12,30.0,14.0,"monastir, tunisia",arabic11,arabic,female,13,tunisia
4 | 18,18,19.0,15.0,"casablanca, morocco",arabic5,arabic,female,19,morocco
5 | 20,20,18.0,15.0,"casablanca, morocco",arabic7,arabic,female,21,morocco
6 | 22,22,28.0,4.0,"jerusalem, israel",arabic9,arabic,female,23,israel
7 | 669,669,46.0,30.0,"beirut, lebanon",arabic21,arabic,female,670,lebanon
8 | 919,919,23.0,10.0,"abu dhabi, united arab emirates",arabic22,arabic,female,920,united arab emirates
9 | 1011,1011,43.0,14.0,"amman, jordan",arabic26,arabic,female,1012,jordan
10 | 1132,1132,19.0,8.0,"riyadh, saudi arabia",arabic30,arabic,female,1133,saudi arabia
11 | 1180,1180,47.0,12.0,"baghdad, iraq",arabic31,arabic,female,1181,iraq
12 | 1203,1203,28.0,11.0,"baghdad, iraq",arabic33,arabic,female,1204,iraq
13 | 1252,1252,34.0,6.0,"kuwait city, kuwait",arabic34,arabic,female,1253,kuwait
14 | 1393,1393,19.0,9.0,"shady grove, maryland, usa",arabic38,arabic,female,1394,usa
15 | 1425,1425,29.0,8.0,"beirut, lebanon",arabic43,arabic,female,1426,lebanon
16 | 1454,1454,29.0,7.0,"jiddah, saudi arabia",arabic44,arabic,female,1455,saudi arabia
17 | 1455,1455,32.0,12.0,"fort collins, colorado, usa",arabic45,arabic,female,1456,usa
18 | 1486,1486,21.0,9.0,"jiddah, saudi arabia",arabic52,arabic,female,1487,saudi arabia
19 | 1698,1698,28.0,17.0,"beirut, lebanon",arabic57,arabic,female,1699,lebanon
20 | 1768,1768,28.0,10.0,"bani walid, libya",arabic63,arabic,female,1769,libya
21 | 1777,1777,32.0,10.0,"samail, oman",arabic65,arabic,female,1778,oman
22 | 1809,1809,55.0,15.0,"kerma, sudan",arabic69,arabic,female,1810,sudan
23 | 1827,1827,29.0,21.0,"kuwait city, kuwait",arabic71,arabic,female,1828,kuwait
24 | 1831,1831,20.0,11.0,"baghdad, iraq",arabic72,arabic,female,1832,iraq
25 | 1840,1840,47.0,7.0,"nazareth, israel",arabic73,arabic,female,1841,israel
26 | 1884,1884,26.0,2.0,"riyadh, saudi arabia",arabic75,arabic,female,1885,saudi arabia
27 | 1893,1893,18.0,4.0,"ad dammam, saudi arabia",arabic77,arabic,female,1894,saudi arabia
28 | 1896,1896,19.0,3.0,"washington, district of columbia, usa",arabic78,arabic,female,1897,usa
29 | 1898,1898,24.0,13.0,"medina, saudi arabia",arabic79,arabic,female,1899,saudi arabia
30 | 1926,1926,26.0,12.0,"medina, saudi arabia",arabic84,arabic,female,1927,saudi arabia
31 | 1962,1962,24.0,14.0,"ad dammam, saudi arabia",arabic85,arabic,female,1963,saudi arabia
32 | 2004,2004,49.0,10.0,"kuwait city, kuwait",arabic87,arabic,female,2005,kuwait
33 | 2007,2007,19.0,4.0,"beirut, lebanon",arabic88,arabic,female,2008,lebanon
34 | 2021,2021,35.0,19.0,"baghdad, iraq",arabic89,arabic,female,2022,iraq
35 | 2035,2035,23.0,12.0,"tripoli, libya",arabic90,arabic,female,2036,libya
36 | 2085,2085,57.0,12.0,"riyadh, saudi arabia",arabic91,arabic,female,2086,saudi arabia
37 | 2111,2111,27.0,10.0,"baghdad, iraq",arabic93,arabic,female,2112,iraq
38 | 2113,2113,24.0,9.0,"abu dhabi, united arab emirates",arabic94,arabic,female,2114,united arab emirates
39 | 2142,2142,31.0,10.0,"kuwait city, kuwait",arabic95,arabic,female,2143,kuwait
40 | 2153,2153,19.0,4.0,"sharjah, united arab emirates",arabic97,arabic,female,2154,united arab emirates
41 | 2154,2154,19.0,4.0,"ras al khaimah, united arab emirates",arabic98,arabic,female,2155,united arab emirates
42 | 2156,2156,25.0,16.0,"ad dammam, saudi arabia",arabic99,arabic,female,2157,saudi arabia
43 | 2158,2158,40.0,10.0,"beirut, lebanon",arabic100,arabic,female,2159,lebanon
44 |
--------------------------------------------------------------------------------
/code/conv_1d_model.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import print_function
3 | import numpy as np
4 | from sklearn.cross_validation import train_test_split
5 | from sklearn.metrics import classification_report
6 | #np.random.seed(1337) # for reproducibility
7 |
8 | from keras.preprocessing import sequence
9 | from keras.models import Sequential
10 | from keras.layers.core import Dense, Dropout, Activation, Flatten
11 | from keras.layers.normalization import BatchNormalization
12 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
13 | from keras.utils import np_utils
14 |
15 |
16 | # set parameters:
17 | test_dim = 2999
18 | maxlen = 100
19 | batch_size = 100
20 | nb_filter = 64
21 | filter_length_1 = 50
22 | filter_length_2 = 25
23 | hidden_dims = 250
24 | nb_epoch = 8
25 | nb_classes = 2
26 |
27 | print('Loading data...')
28 | X = np.load('usa373_span162_mfcc_13.npy')
29 | y = np.append(np.ones(373), np.zeros(162))
30 |
31 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
32 |
33 | xts = X_train.shape
34 | #X_train = np.reshape(X_train, (xts[0], xts[1], 1))
35 | xtss = X_test.shape
36 | #X_test = np.reshape(X_test, (xtss[0], xtss[1], 1))
37 | yts = y_train.shape
38 | #y_train = np.reshape(y_train, (yts[0], 1))
39 | ytss = y_test.shape
40 | #y_test = np.reshape(y_test, (ytss[0], 1))
41 |
42 | print(len(X_train), 'train sequences')
43 | print(len(X_test), 'test sequences')
44 |
45 | Y_train = np_utils.to_categorical(y_train, nb_classes)
46 | Y_test = np_utils.to_categorical(y_test, nb_classes)
47 |
48 | # print('Pad sequences (samples x time)')
49 | # X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
50 | # X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
51 | # print('X_train shape:', X_train.shape)
52 | # print('X_test shape:', X_test.shape)
53 |
54 | print('Build model...')
55 | model = Sequential()
56 |
57 | # we start off with an efficient embedding layer which maps
58 | # our vocab indices into embedding_dims dimensions
59 | # model.add(Embedding(max_features, embedding_dims, input_length=maxlen))
60 | # model.add(Dropout(0.25))
61 |
62 | # we add a Convolution1D, which will learn nb_filter
63 | # word group filters of size filter_length:
64 | model.add(Convolution1D(nb_filter=nb_filter,
65 | filter_length=filter_length_1,
66 | input_shape=(test_dim, 13),
67 | border_mode='valid',
68 | activation='relu'
69 | ))
70 | # we use standard max pooling (halving the output of the previous layer):
71 | model.add(BatchNormalization())
72 |
73 | model.add(Convolution1D(nb_filter=nb_filter,
74 | filter_length=filter_length_2,
75 | border_mode='same',
76 | activation='relu'
77 | ))
78 |
79 | model.add(BatchNormalization())
80 |
81 | model.add(MaxPooling1D(pool_length=2))
82 |
83 | model.add(Convolution1D(nb_filter=nb_filter,
84 | filter_length=filter_length_2,
85 | border_mode='same',
86 | activation='relu'
87 | ))
88 |
89 | model.add(BatchNormalization())
90 |
91 | model.add(MaxPooling1D(pool_length=2))
92 |
93 | # We flatten the output of the conv layer,
94 | # so that we can add a vanilla dense layer:
95 | model.add(Flatten())
96 |
97 | # We add a vanilla hidden layer:
98 | # model.add(Dense(hidden_dims))
99 | model.add(Dropout(0.25))
100 | # model.add(Activation('relu'))
101 |
102 | # We project onto a single unit output layer, and squash it with a sigmoid:
103 | model.add(Dense(2))
104 | model.add(Activation('softmax'))
105 |
106 | model.compile(loss='binary_crossentropy',
107 | optimizer='rmsprop')
108 | model.fit(X_train, Y_train, batch_size=batch_size,
109 | nb_epoch=nb_epoch, verbose=1,
110 | validation_data=(X_test, Y_test), show_accuracy=True)
111 |
112 | #y_preds = model.predict(X_test)
113 |
114 | score = model.evaluate(X_test, Y_test, verbose=1)
115 |
116 | #print(classification_report(y_test, y_preds))
117 |
--------------------------------------------------------------------------------
/dataframes/df_arabic_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 11,11,26.0,5.0,"cairo, egypt",arabic10,arabic,male,12,egypt
3 | 13,13,32.0,11.0,"baghdad, iraq",arabic12,arabic,male,14,iraq
4 | 14,14,25.0,15.0,"zabbougha, lebanon",arabic13,arabic,male,15,lebanon
5 | 15,15,18.0,2.5,"damascus, syria",arabic2,arabic,male,16,syria
6 | 16,16,24.0,9.0,"doha, qatar",arabic3,arabic,male,17,qatar
7 | 17,17,19.0,6.0,"sharjah, united arab emirates",arabic4,arabic,male,18,united arab emirates
8 | 19,19,21.0,14.5,"sanaa, yemen",arabic6,arabic,male,20,yemen
9 | 21,21,21.0,17.0,"casablanca, morocco",arabic8,arabic,male,22,morocco
10 | 427,427,47.0,11.0,"amman, jordan",arabic14,arabic,male,428,jordan
11 | 436,436,40.0,33.0,"qayrawan, tunisia",arabic15,arabic,male,437,tunisia
12 | 470,470,22.0,16.0,"meknes, morocco",arabic16,arabic,male,471,morocco
13 | 511,511,35.0,15.0,"settat, morocco",arabic17,arabic,male,512,morocco
14 | 538,538,18.0,6.0,"kuwait city, kuwait",arabic18,arabic,male,539,kuwait
15 | 624,624,43.0,18.0,"khouribga, morocco",arabic19,arabic,male,625,morocco
16 | 642,642,36.0,12.0,"chtaura, beqa valley, lebanon",arabic20,arabic,male,643,lebanon
17 | 926,926,42.0,13.0,"al mahalla, egypt",arabic23,arabic,male,927,egypt
18 | 969,969,40.0,7.0,"cairo, egypt",arabic24,arabic,male,970,egypt
19 | 1010,1010,38.0,5.0,"ramun, israel (occupied territory)",arabic25,arabic,male,1011,israel (occupied territory)
20 | 1012,1012,70.0,10.0,"jerusalem, israel",arabic27,arabic,male,1013,israel
21 | 1114,1114,30.0,12.0,"al-ayn, united arab emirates",arabic28,arabic,male,1115,united arab emirates
22 | 1115,1115,19.0,17.0,"doha, qatar",arabic29,arabic,male,1116,qatar
23 | 1202,1202,42.0,12.0,"baghdad, iraq",arabic32,arabic,male,1203,iraq
24 | 1390,1390,56.0,16.0,"jiddah, saudi arabia",arabic35,arabic,male,1391,saudi arabia
25 | 1391,1391,36.0,6.0,"jiddah, saudi arabia",arabic36,arabic,male,1392,saudi arabia
26 | 1392,1392,18.0,16.0,"jiddah, saudi arabia",arabic37,arabic,male,1393,saudi arabia
27 | 1404,1404,22.0,5.0,"jiddah, saudi arabia",arabic39,arabic,male,1405,saudi arabia
28 | 1419,1419,19.0,13.0,"jiddah, saudi arabia",arabic40,arabic,male,1420,saudi arabia
29 | 1420,1420,25.0,7.0,"riyadh, saudi arabia",arabic41,arabic,male,1421,saudi arabia
30 | 1421,1421,21.0,13.0,"jiddah, saudi arabia",arabic42,arabic,male,1422,saudi arabia
31 | 1456,1456,28.0,13.0,"jiddah, saudi arabia",arabic46,arabic,male,1457,saudi arabia
32 | 1457,1457,39.0,16.0,"jiddah, saudi arabia",arabic47,arabic,male,1458,saudi arabia
33 | 1458,1458,29.0,12.0,"mecca, saudi arabia",arabic48,arabic,male,1459,saudi arabia
34 | 1459,1459,26.0,8.0,"medina, saudi arabia",arabic49,arabic,male,1460,saudi arabia
35 | 1460,1460,36.0,22.0,"jiddah, saudi arabia",arabic50,arabic,male,1461,saudi arabia
36 | 1485,1485,23.0,12.0,"jiddah, saudi arabia",arabic51,arabic,male,1486,saudi arabia
37 | 1525,1525,36.0,17.0,"rabat, morocco",arabic53,arabic,male,1526,morocco
38 | 1564,1564,22.0,7.0,"baghdad, iraq",arabic54,arabic,male,1565,iraq
39 | 1568,1568,70.0,33.0,"beirut, lebanon",arabic55,arabic,male,1569,lebanon
40 | 1676,1676,43.0,12.0,"ain defla, algeria",arabic56,arabic,male,1677,algeria
41 | 1699,1699,34.0,9.0,"baghdad, iraq",arabic58,arabic,male,1700,iraq
42 | 1701,1701,55.0,9.0,"irbid, jordan",arabic59,arabic,male,1702,jordan
43 | 1710,1710,60.0,14.0,"cairo, egypt",arabic60,arabic,male,1711,egypt
44 | 1752,1752,20.0,7.0,"manama, bahrain",arabic61,arabic,male,1753,bahrain
45 | 1767,1767,19.0,18.0,"ad dammam, saudi arabia",arabic62,arabic,male,1768,saudi arabia
46 | 1776,1776,23.0,6.0,"riyadh, saudi arabia",arabic64,arabic,male,1777,saudi arabia
47 | 1783,1783,50.0,14.0,"medina, saudi arabia",arabic66,arabic,male,1784,saudi arabia
48 | 1784,1784,43.0,22.0,"nasriah, iraq",arabic67,arabic,male,1785,iraq
49 | 1804,1804,20.0,6.0,"london, uk",arabic68,arabic,male,1805,uk
50 | 1826,1826,47.0,16.0,"casablanca, morocco",arabic70,arabic,male,1827,morocco
51 | 1869,1869,18.0,4.0,"alexandria, egypt",arabic74,arabic,male,1870,egypt
52 | 1891,1891,25.0,4.0,"jiddah, saudi arabia",arabic76,arabic,male,1892,saudi arabia
53 | 1900,1900,25.0,11.0,"najran, saudi arabia",arabic80,arabic,male,1901,saudi arabia
54 | 1913,1913,22.0,17.0,"riyadh, saudi arabia",arabic81,arabic,male,1914,saudi arabia
55 | 1916,1916,42.0,12.0,"damascus, syria",arabic82,arabic,male,1917,syria
56 | 1919,1919,22.0,21.0,"riyadh, saudi arabia",arabic83,arabic,male,1920,saudi arabia
57 | 1963,1963,28.0,3.0,"cairo, egypt",arabic86,arabic,male,1964,egypt
58 | 2086,2086,36.0,10.0,"baghdad, iraq",arabic92,arabic,male,2087,iraq
59 | 2152,2152,21.0,7.0,"riyadh, saudi arabia",arabic96,arabic,male,2153,saudi arabia
60 | 2159,2159,21.0,10.0,"doha, qatar",arabic101,arabic,male,2160,qatar
61 | 2160,2160,22.0,3.0,"jiddah, saudi arabia",arabic102,arabic,male,2161,saudi arabia
62 |
--------------------------------------------------------------------------------
/code/conv_1d_model_aws.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import print_function
3 | import numpy as np
4 | from sklearn.cross_validation import train_test_split
5 | from sklearn.metrics import classification_report
6 | np.random.seed(1337) # for reproducibility
7 |
8 | from keras.preprocessing import sequence
9 | from keras.layers.noise import GaussianNoise
10 | from keras.models import Sequential
11 | from keras.layers.core import Dense, Dropout, Activation, Flatten
12 | from keras.layers.normalization import BatchNormalization
13 | from keras.layers.convolutional import Convolution1D, MaxPooling1D, AveragePooling1D
14 | from keras.utils import np_utils
15 |
16 |
17 | # set parameters:
18 | test_dim = 999
19 | maxlen = 100
20 | batch_size = 50
21 | nb_filter = 512
22 | filter_length_1 = 100
23 | filter_length_2 = 30
24 | filter_length_3 = 15
25 | hidden_dims = 10
26 | nb_epoch = 5
27 | nb_classes = 3
28 |
29 | print('Loading data...')
30 | X = np.load('top_3_100_split_mfcc.npy')
31 | y = np.load('top_3_100_split_y.npy')
32 |
33 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
34 |
35 | # in case the passed in data is 2d and not 3d
36 | '''
37 | xts = X_train.shape
38 | X_train = np.reshape(X_train, (xts[0], xts[1], 1))
39 | xtss = X_test.shape
40 | X_test = np.reshape(X_test, (xtss[0], xtss[1], 1))
41 | yts = y_train.shape
42 | y_train = np.reshape(y_train, (yts[0], 1))
43 | ytss = y_test.shape
44 | y_test = np.reshape(y_test, (ytss[0], 1))
45 | '''
46 |
47 | print(len(X_train), 'train sequences')
48 | print(len(X_test), 'test sequences')
49 |
50 | Y_train = np_utils.to_categorical(y_train, nb_classes)
51 | Y_test = np_utils.to_categorical(y_test, nb_classes)
52 |
53 |
54 | print('Build model...')
55 | model = Sequential()
56 |
57 | # we add a Convolution1D, which will learn nb_filter mfcc groups:
58 | model.add(Convolution1D(nb_filter=nb_filter,
59 | filter_length=filter_length_1,
60 | input_shape=(test_dim, 13),
61 | init = 'glorot_normal',
62 | border_mode='valid',
63 | activation='relu'
64 | ))
65 |
66 | # batch normalization to keep weights in the 0 to 1 range
67 | model.add(BatchNormalization())
68 |
69 | # add more layers
70 | model.add(Convolution1D(nb_filter=nb_filter,
71 | filter_length=filter_length_2,
72 | border_mode='valid',
73 | activation='relu'
74 | ))
75 |
76 | model.add(BatchNormalization())
77 |
78 | # we use standard max pooling (halving the output of the previous layer)
79 | model.add(MaxPooling1D(pool_length=2))
80 |
81 |
82 | model.add(Convolution1D(nb_filter=nb_filter,
83 | filter_length=filter_length_2,
84 | border_mode='valid',
85 | activation='relu'
86 | ))
87 |
88 | model.add(BatchNormalization())
89 |
90 | model.add(MaxPooling1D(pool_length=2))
91 |
92 | model.add(Convolution1D(nb_filter=nb_filter,
93 | filter_length=filter_length_2,
94 | border_mode='valid',
95 | activation='relu'
96 | ))
97 |
98 | model.add(BatchNormalization())
99 |
100 | model.add(MaxPooling1D(pool_length=2))
101 |
102 | # Dropout reduces overfitting
103 | model.add(Dropout(.1))
104 |
105 | model.add(Convolution1D(nb_filter=nb_filter,
106 | filter_length=filter_length_2,
107 | border_mode='valid',
108 | activation='relu'
109 | ))
110 |
111 | model.add(BatchNormalization())
112 |
113 | model.add(MaxPooling1D(pool_length=2))
114 |
115 | model.add(Dropout(.1))
116 |
117 | model.add(Convolution1D(nb_filter=nb_filter,
118 | filter_length=filter_length_3,
119 | border_mode='valid',
120 | activation='relu'
121 | ))
122 |
123 | model.add(BatchNormalization())
124 |
125 | model.add(MaxPooling1D(pool_length=2))
126 |
127 | # We flatten the output of the conv layer,
128 | # so that we can add a vanilla dense layer:
129 | model.add(Flatten())
130 |
131 | # We project onto a single unit output layer, and squash it with a softmax into 0-1 probability space:
132 | model.add(Dense(nb_classes))
133 | model.add(Activation('softmax'))
134 |
135 | model.compile(loss='categorical_crossentropy',
136 | optimizer='adam', metrics = ["accuracy"])
137 | model.fit(X_train, Y_train, batch_size=batch_size,
138 | nb_epoch=nb_epoch, verbose=1,
139 | validation_data=(X_test, Y_test))
140 |
141 | # print report of recall, precision, f1 score
142 | y_pred = model.predict_classes(X_test)
143 | print(classification_report(y_test, y_pred))
144 |
--------------------------------------------------------------------------------
/dataframes/df_spanish_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 324,324,49.0,28.0,"lima, peru",spanish11,spanish,female,325,peru
3 | 326,326,29.0,11.0,"duitama, colombia",spanish13,spanish,female,327,colombia
4 | 327,327,34.0,15.0,"pereira, colombia",spanish14,spanish,female,328,colombia
5 | 330,330,22.0,9.0,"puebla, mexico",spanish17,spanish,female,331,mexico
6 | 332,332,25.0,17.0,"havana, cuba",spanish19,spanish,female,333,cuba
7 | 336,336,32.0,11.0,"madrid, spain",spanish22,spanish,female,337,spain
8 | 342,342,20.0,18.0,"san miguel, el salvador",spanish28,spanish,female,343,el salvador
9 | 343,343,17.0,16.0,"bogota, colombia",spanish3,spanish,female,344,colombia
10 | 344,344,21.0,17.0,"cabanas, el salvador",spanish4,spanish,female,345,el salvador
11 | 345,345,52.0,13.0,"bogota, colombia",spanish5,spanish,female,346,colombia
12 | 347,347,21.0,20.0,"bogota, colombia",spanish7,spanish,female,348,colombia
13 | 448,448,31.0,7.0,"guayaquil, ecuador",spanish31,spanish,female,449,ecuador
14 | 574,574,50.0,17.0,"tegucigalpa, honduras",spanish37,spanish,female,575,honduras
15 | 711,711,80.0,18.0,"bogota, colombia",spanish42,spanish,female,712,colombia
16 | 714,714,47.0,12.0,"santiago-dr, dominican republic",spanish45,spanish,female,715,dominican republic
17 | 715,715,37.0,4.0,"la romana, dominican republic",spanish44,spanish,female,716,dominican republic
18 | 717,717,25.0,12.0,"santo domingo, dominican republic",spanish47,spanish,female,718,dominican republic
19 | 776,776,29.0,21.0,"popayan, cauca, colombia",spanish52,spanish,female,777,colombia
20 | 907,907,31.0,5.0,"mayaguez, puerto rico",spanish59,spanish,female,908,puerto rico
21 | 914,914,18.0,9.0,"santiago-dr, dominican republic",spanish60,spanish,female,915,dominican republic
22 | 924,924,63.0,19.0,"buenos aires, argentina",spanish62,spanish,female,925,argentina
23 | 958,958,32.0,10.0,"la coruna, spain",spanish63,spanish,female,959,spain
24 | 1110,1110,48.0,14.0,"santa marta, colombia",spanish67,spanish,female,1111,colombia
25 | 1228,1228,20.0,7.0,"caracas, venezuela",spanish69,spanish,female,1229,venezuela
26 | 1229,1229,30.0,10.0,"bogota, colombia",spanish70,spanish,female,1230,colombia
27 | 1237,1237,20.0,5.0,"miami, florida, usa",spanish72,spanish,female,1238,usa
28 | 1272,1272,28.0,5.0,"lima, peru",spanish75,spanish,female,1273,peru
29 | 1337,1337,55.0,5.0,"lima, peru",spanish78,spanish,female,1338,peru
30 | 1343,1343,70.0,22.0,"lamas, peru",spanish79,spanish,female,1344,peru
31 | 1345,1345,77.0,75.0,"bogota, colombia",spanish80,spanish,female,1346,colombia
32 | 1346,1346,63.0,7.0,"san juan, puerto rico",spanish81,spanish,female,1347,puerto rico
33 | 1397,1397,19.0,4.0,"buenos aires, argentina",spanish84,spanish,female,1398,argentina
34 | 1398,1398,44.0,6.0,"buenos aires, argentina",spanish85,spanish,female,1399,argentina
35 | 1430,1430,23.0,3.0,"caracas, venezuela",spanish90,spanish,female,1431,venezuela
36 | 1432,1432,20.0,7.0,"caracas, venezuela",spanish91,spanish,female,1433,venezuela
37 | 1435,1435,29.0,18.0,"santa rosa, honduras",spanish93,spanish,female,1436,honduras
38 | 1443,1443,31.0,21.0,"juarez, mexico",spanish95,spanish,female,1444,mexico
39 | 1448,1448,29.0,12.0,"arequipa, peru",spanish96,spanish,female,1449,peru
40 | 1449,1449,52.0,19.0,"bogota, colombia",spanish97,spanish,female,1450,colombia
41 | 1505,1505,20.0,4.0,"lima, peru",spanish99,spanish,female,1506,peru
42 | 1507,1507,30.0,26.0,"iquitos, peru",spanish100,spanish,female,1508,peru
43 | 1573,1573,39.0,19.0,"la union, el salvador",spanish101,spanish,female,1574,el salvador
44 | 1599,1599,28.0,20.0,"san miguel, el salvador",spanish104,spanish,female,1600,el salvador
45 | 1612,1612,41.0,10.0,"jerez de la frontera, spain",spanish105,spanish,female,1613,spain
46 | 1731,1731,22.0,8.0,"seville, spain",spanish114,spanish,female,1732,spain
47 | 1743,1743,58.0,8.0,"la paz, bolivia",spanish115,spanish,female,1744,bolivia
48 | 1805,1805,48.0,13.0,"monterrey, mexico",spanish118,spanish,female,1806,mexico
49 | 1806,1806,40.0,11.0,"madrid, spain",spanish119,spanish,female,1807,spain
50 | 1822,1822,25.0,2.0,"bogota, colombia",spanish120,spanish,female,1823,colombia
51 | 1830,1830,26.0,10.0,"toluca, mexico",spanish122,spanish,female,1831,mexico
52 | 1833,1833,57.0,14.0,"ilbague, colombia",spanish123,spanish,female,1834,colombia
53 | 1852,1852,54.0,5.0,"colon, panama",spanish126,spanish,female,1853,panama
54 | 1868,1868,30.0,7.0,"lima, peru",spanish128,spanish,female,1869,peru
55 | 1873,1873,47.0,30.0,"havana, cuba",spanish129,spanish,female,1874,cuba
56 | 1903,1903,50.0,15.0,"rio piedras, puerto rico",spanish132,spanish,female,1904,puerto rico
57 | 1920,1920,41.0,13.0,"la union, el salvador",spanish134,spanish,female,1921,el salvador
58 | 1922,1922,41.0,22.0,"santiago de cuba, cuba",spanish135,spanish,female,1923,cuba
59 | 1978,1978,24.0,21.0,"santa ana, el salvador",spanish137,spanish,female,1979,el salvador
60 | 1979,1979,24.0,16.0,"san miguel, el salvador",spanish138,spanish,female,1980,el salvador
61 | 1984,1984,37.0,31.0,"cochabamba, bolivia",spanish140,spanish,female,1985,bolivia
62 | 2015,2015,59.0,8.0,"san salvador, el salvador",spanish142,spanish,female,2016,el salvador
63 | 2050,2050,33.0,23.0,"florencia, zacatecas, mexico",spanish144,spanish,female,2051,mexico
64 | 2051,2051,37.0,21.0,"florencia, zacatecas, mexico",spanish145,spanish,female,2052,mexico
65 | 2095,2095,58.0,50.0,"bilwas karma, rio coco, nicaragua",spanish151,spanish,female,2096,nicaragua
66 | 2141,2141,65.0,8.0,"montevideo, uruguay",spanish156,spanish,female,2142,uruguay
67 | 2143,2143,19.0,3.0,"mexico city, mexico",spanish157,spanish,female,2144,mexico
68 | 2147,2147,54.0,37.0,"santurce, puerto rico",spanish158,spanish,female,2148,puerto rico
69 | 2155,2155,56.0,32.0,"leon, nicaragua",spanish161,spanish,female,2156,nicaragua
70 |
--------------------------------------------------------------------------------
/code/testing.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from features import mfcc
4 | from features import logfbank
5 | import scipy.io.wavfile as wav
6 | from scipy.io.wavfile import write as wav_write
7 | import librosa
8 | import scikits.samplerate
9 | import os
10 |
11 |
12 | '''
13 | mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
14 | '''
15 | # read in wav file, get out signal (np array) and sampling rate (int)
16 | def read_in_audio(filename):
17 | (rate, sig) = wav.read(filename)
18 | return sig, rate
19 |
20 |
21 | # read in signal, take absolute value and slice seconds 1-3 from beginning
22 | def get_two_secs(filename):
23 | sig, rate = read_in_audio(filename)
24 | abs_sig = np.abs(sig)
25 | two_secs = abs_sig[rate:3*rate]
26 | return two_secs
27 |
28 | # calculates moving average for a specified window (number of samples)
29 | def take_moving_average(sig, window_width):
30 | cumsum_vec = np.cumsum(np.insert(sig, 0, 0))
31 | ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width])/float(window_width)
32 | return ma_vec
33 |
34 | # read in signal, change sample rate to outrate (samples/sec), use write_wav=True to save wav file to disk
35 | def downsample(filename, outrate=8000, write_wav = False):
36 | (rate, sig) = wav.read(filename)
37 | down_sig = librosa.core.resample(sig, rate, outrate, scale=True)
38 | if not write_wav:
39 | return down_sig, outrate
40 | if write_wav:
41 | wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
42 |
43 | # change total number of samps for downsampled file to n_samps by trimming or zero-padding and standardize them
44 | def make_standard_length(filename, n_samps=240000):
45 | down_sig, rate = downsample(filename)
46 | normed_sig = librosa.util.fix_length(down_sig, n_samps)
47 | normed_sig = (normed_sig - np.mean(normed_sig))/np.std(normed_sig))
48 | return normed_sig
49 |
50 | # from a folder containing wav files, normalize each, divide into num_splits-1 chunks and write the resulting np.arrays to a single matrix
51 | def make_split_audio_array(folder, num_splits = 5):
52 | lst = []
53 | for filename in os.listdir(folder):
54 | if filename.endswith('wav'):
55 | normed_sig = make_standard_length(filename)
56 | chunk = normed_sig.shape[0]/num_splits
57 | for i in range(num_splits - 1):
58 | lst.append(normed_sig[i*chunk:(i+2)*chunk])
59 | lst = np.array(lst)
60 | lst = lst.reshape(lst.shape[0], -1)
61 | return lst
62 |
63 | # for input wav file outputs (13, 2999) mfcc np array
64 | def make_normed_mfcc(filename, outrate=8000):
65 | normed_sig = make_standard_length(filename)
66 | normed_mfcc_feat = mfcc(normed_sig, outrate)
67 | normed_mfcc_feat = normed_mfcc_feat.T
68 | return normed_mfcc_feat
69 |
70 | # make mfcc np array from wav file using librosa package
71 | def make_librosa_mfcc(filename):
72 | y, sr = librosa.load(filename)
73 | mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
74 | return mfcc_feat
75 |
76 | # make mfcc np array from wav file using speech features package
77 | def make_mfcc(filename):
78 | (rate, sig) = wav.read(filename)
79 | mfcc_feat = mfcc(sig, rate)
80 | mfcc_feat = mfcc_feat.T
81 | return mfcc_feat
82 |
83 | # for folder containing wav files, output numpy array of normed mfcc
84 | def make_class_array(folder):
85 | lst = []
86 | for filename in os.listdir(folder):
87 | lst.append(make_normed_mfcc(filename))
88 | class_array = np.array(lst)
89 | class_array = np.reshape(class_array, (class_array.shape[0], class_array.shape[2], class_array.shape[1]))
90 | return class_array
91 |
92 | # read in wav file, output (1,13) numpy array of mean mfccs for each of 13 features
93 | def make_mean_mfcc(filename):
94 | try:
95 | (rate, sig) = wav.read(filename)
96 | mfcc_feat = mfcc(sig, rate)
97 | avg_mfcc = np.mean(mfcc_feat, axis = 0)
98 | return avg_mfcc
99 | except:
100 | pass
101 |
102 | # write new csv corresponding to dataframe of given language and gender
103 | def make_df_language_gender(df, language, gender):
104 | newdf = df.query("native_language == @language").query("sex == @gender")
105 | newdf.to_csv('df_{}_{}.csv'.format(language, gender))
106 |
107 | # write new directories to disk containing the male and female speakers from the most common languages
108 | def make_folders_from_csv():
109 | top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
110 | for lang in top_15_langs:
111 | os.makedirs('{}/{}_male'.format(lang, lang))
112 | os.makedirs('{}/{}_female'.format(lang, lang))
113 |
114 | # copy files to the corresponding directories
115 | def copy_files_from_csv():
116 | top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
117 | for lang in top_15_langs:
118 | df_male = pd.read_csv('df_{}_male.csv'.format(lang))
119 | df_female = pd.read_csv('df_{}_female.csv'.format(lang))
120 | m_list = df_male['filename'].values
121 | f_list = df_female['filename'].values
122 | for filename in f_list:
123 | shutil.copy2('big_langs/{}/{}.wav'.format(lang, filename), 'big_langs/{}/{}_female/{}.wav'.format(lang, lang, filename))
124 |
125 | # input folder of wav files, output pandas dataframe of mean mfcc values
126 | def make_mean_mfcc_df(folder):
127 | norms = []
128 | for filename in os.listdir(folder):
129 | (rate, sig) = wav.read(filename)
130 | mfcc_feat = mfcc(sig, rate)
131 | mean_mfcc = np.mean(mfcc_feat, axis = 0)
132 | #mean_mfcc = np.reshape(mean_mfcc, (1,13))
133 | norms.append(mean_mfcc)
134 | flat = [a.ravel() for a in norms]
135 | stacked = np.vstack(flat)
136 | df = pd.DataFrame(stacked)
137 | return df
138 |
--------------------------------------------------------------------------------
/dataframes/df_spanish_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 322,322,24.0,17.0,"caracas, venezuela",spanish1,spanish,male,323,venezuela
3 | 323,323,48.0,47.0,"san luis de la reina, el salvador",spanish10,spanish,male,324,el salvador
4 | 325,325,25.0,8.0,"oviedo, spain",spanish12,spanish,male,326,spain
5 | 328,328,54.0,4.0,"san juan, puerto rico",spanish15,spanish,male,329,puerto rico
6 | 329,329,44.0,25.0,"buenos aires, argentina",spanish16,spanish,male,330,argentina
7 | 331,331,22.0,17.0,"la paz, bolivia",spanish18,spanish,male,332,bolivia
8 | 333,333,20.0,12.0,"puerto la cruz, venezuela",spanish2,spanish,male,334,venezuela
9 | 334,334,45.0,30.0,"managua, nicaragua",spanish20,spanish,male,335,nicaragua
10 | 335,335,25.0,18.0,"santiago, chile",spanish21,spanish,male,336,chile
11 | 337,337,22.0,7.0,"santiago, chile",spanish23,spanish,male,338,chile
12 | 338,338,27.0,5.0,"mexico city, mexico",spanish24,spanish,male,339,mexico
13 | 339,339,26.0,21.0,"san jose, costa rica",spanish25,spanish,male,340,costa rica
14 | 340,340,18.0,6.0,"burgos, spain",spanish26,spanish,male,341,spain
15 | 341,341,34.0,14.0,"zaragoza, spain",spanish27,spanish,male,342,spain
16 | 346,346,20.0,19.0,"lima, peru",spanish6,spanish,male,347,peru
17 | 348,348,21.0,12.0,"san salvador, el salvador",spanish8,spanish,male,349,el salvador
18 | 349,349,20.0,17.0,"araure, venezuela",spanish9,spanish,male,350,venezuela
19 | 409,409,18.0,10.0,"maracaibo, zulia, venezuela",spanish29,spanish,male,410,venezuela
20 | 440,440,18.0,5.0,"buenos aires, argentina",spanish30,spanish,male,441,argentina
21 | 449,449,26.0,22.0,"santa cruz, bolivia",spanish32,spanish,male,450,bolivia
22 | 466,466,18.0,15.0,"caracas, venezuela",spanish33,spanish,male,467,venezuela
23 | 475,475,28.0,11.0,"cartagena, spain",spanish34,spanish,male,476,spain
24 | 478,478,28.0,17.0,"mexico city, mexico",spanish35,spanish,male,479,mexico
25 | 498,498,27.0,25.0,"montevideo, uruguay",spanish36,spanish,male,499,uruguay
26 | 575,575,53.0,13.0,"tegucigalpa, honduras",spanish38,spanish,male,576,honduras
27 | 613,613,20.0,3.0,"pamplona, spain",spanish39,spanish,male,614,spain
28 | 643,643,20.0,6.0,"cartago, costa rica",spanish40,spanish,male,644,costa rica
29 | 705,705,25.0,6.0,"bonao, dominican republic",spanish41,spanish,male,706,dominican republic
30 | 713,713,43.0,16.0,"santiago-dr, dominican republic",spanish43,spanish,male,714,dominican republic
31 | 716,716,33.0,14.0,"santo domingo, dominican republic",spanish46,spanish,male,717,dominican republic
32 | 730,730,21.0,6.0,"bilbao, spain",spanish48,spanish,male,731,spain
33 | 731,731,31.0,6.0,"tandil, argentina",spanish49,spanish,male,732,argentina
34 | 751,751,27.0,15.0,"santo domingo, dominican republic",spanish50,spanish,male,752,dominican republic
35 | 752,752,20.0,6.0,"caguas, puerto rico",spanish51,spanish,male,753,puerto rico
36 | 796,796,33.0,6.0,"bogota, colombia",spanish53,spanish,male,797,colombia
37 | 799,799,25.0,18.0,"vina del mar, chile",spanish54,spanish,male,800,chile
38 | 809,809,36.0,9.0,"tandil, argentina",spanish55,spanish,male,810,argentina
39 | 837,837,22.0,8.0,"monterrey, mexico",spanish56,spanish,male,838,mexico
40 | 885,885,33.0,11.0,"montevideo, uruguay",spanish57,spanish,male,886,uruguay
41 | 905,905,31.0,18.0,"bogota, colombia",spanish58,spanish,male,906,colombia
42 | 923,923,67.0,39.0,"azuaga, spain",spanish61,spanish,male,924,spain
43 | 961,961,28.0,12.0,"medellin, colombia",spanish64,spanish,male,962,colombia
44 | 1068,1068,32.0,12.0,"barranquilla, colombia",spanish65,spanish,male,1069,colombia
45 | 1076,1076,30.0,12.0,"san salvador, el salvador",spanish66,spanish,male,1077,el salvador
46 | 1226,1226,27.0,14.0,"san fernando, chile",spanish68,spanish,male,1227,chile
47 | 1230,1230,24.0,6.0,"rosario, argentina",spanish71,spanish,male,1231,argentina
48 | 1239,1239,19.0,15.0,"merida, venezuela",spanish73,spanish,male,1240,venezuela
49 | 1248,1248,44.0,14.0,"zaragoza, spain",spanish74,spanish,male,1249,spain
50 | 1289,1289,27.0,10.0,"la paz, bolivia",spanish76,spanish,male,1290,bolivia
51 | 1300,1300,45.0,13.0,"jalisco, mexico",spanish77,spanish,male,1301,mexico
52 | 1395,1395,49.0,14.0,"la paz, bolivia",spanish82,spanish,male,1396,bolivia
53 | 1396,1396,48.0,14.0,"la paz, bolivia",spanish83,spanish,male,1397,bolivia
54 | 1399,1399,23.0,4.0,"buenos aires, argentina",spanish86,spanish,male,1400,argentina
55 | 1400,1400,55.0,9.0,"la paz, bolivia",spanish87,spanish,male,1401,bolivia
56 | 1401,1401,53.0,25.0,"la paz, bolivia",spanish88,spanish,male,1402,bolivia
57 | 1424,1424,22.0,13.0,"monterrey, mexico",spanish89,spanish,male,1425,mexico
58 | 1434,1434,54.0,9.0,"cardenas, cuba",spanish92,spanish,male,1435,cuba
59 | 1438,1438,19.0,6.0,"bogota, colombia",spanish94,spanish,male,1439,colombia
60 | 1450,1450,34.0,29.0,"veracruz, mexico",spanish98,spanish,male,1451,mexico
61 | 1574,1574,31.0,26.0,"cochabamba, bolivia",spanish102,spanish,male,1575,bolivia
62 | 1583,1583,46.0,15.0,"san miguel, el salvador",spanish103,spanish,male,1584,el salvador
63 | 1620,1620,21.0,5.0,"lima, peru",spanish106,spanish,male,1621,peru
64 | 1621,1621,36.0,14.0,"san salvador, el salvador",spanish107,spanish,male,1622,el salvador
65 | 1628,1628,36.0,24.0,"medellin, colombia",spanish108,spanish,male,1629,colombia
66 | 1659,1659,21.0,16.0,"maracay, venezuela",spanish109,spanish,male,1660,venezuela
67 | 1679,1679,20.0,18.0,"caracas, venezuela",spanish110,spanish,male,1680,venezuela
68 | 1688,1688,55.0,33.0,"santiago, chile",spanish111,spanish,male,1689,chile
69 | 1692,1692,25.0,15.0,"la paz, bolivia",spanish112,spanish,male,1693,bolivia
70 | 1694,1694,19.0,8.0,"cochabamba, bolivia",spanish113,spanish,male,1695,bolivia
71 | 1746,1746,37.0,6.0,"bogota, colombia",spanish116,spanish,male,1747,colombia
72 | 1796,1796,46.0,12.0,"madrid, spain",spanish117,spanish,male,1797,spain
73 | 1825,1825,34.0,21.0,"pasaquina, el salvador",spanish121,spanish,male,1826,el salvador
74 | 1834,1834,22.0,5.0,"santa cruz, bolivia",spanish124,spanish,male,1835,bolivia
75 | 1839,1839,23.0,9.0,"lima, peru",spanish125,spanish,male,1840,peru
76 | 1859,1859,34.0,19.0,"san salvador, el salvador",spanish127,spanish,male,1860,el salvador
77 | 1890,1890,27.0,21.0,"guayaquil, ecuador",spanish130,spanish,male,1891,ecuador
78 | 1902,1902,56.0,25.0,"havana, cuba",spanish131,spanish,male,1903,cuba
79 | 1915,1915,38.0,19.0,"cochabamba, bolivia",spanish133,spanish,male,1916,bolivia
80 | 1948,1948,49.0,8.0,"santiago, chile",spanish136,spanish,male,1949,chile
81 | 1980,1980,50.0,38.0,"guatemala city, guatemala",spanish139,spanish,male,1981,guatemala
82 | 2008,2008,24.0,12.0,"la union, el salvador",spanish141,spanish,male,2009,el salvador
83 | 2025,2025,45.0,6.0,"oviedo, spain",spanish143,spanish,male,2026,spain
84 | 2056,2056,25.0,18.0,"las palmas, oaxaca, mexico",spanish146,spanish,male,2057,mexico
85 | 2077,2077,26.0,15.0,"puerto_cabezas, nicaragua",spanish147,spanish,male,2078,nicaragua
86 | 2078,2078,19.0,11.0,"la ceiba, honduras",spanish148,spanish,male,2079,honduras
87 | 2090,2090,23.0,22.0,"bilwi, puerto cabezas, nicaragua",spanish149,spanish,male,2091,nicaragua
88 | 2093,2093,18.0,13.0,"bilwi, puerto cabezas, nicaragua",spanish150,spanish,male,2094,nicaragua
89 | 2099,2099,51.0,25.0,"barcelona, spain",spanish152,spanish,male,2100,spain
90 | 2117,2117,21.0,10.0,"bogota, colombia",spanish153,spanish,male,2118,colombia
91 | 2126,2126,30.0,4.0,"montevideo, uruguay",spanish154,spanish,male,2127,uruguay
92 | 2129,2129,27.0,9.0,"lima, peru",spanish155,spanish,male,2130,peru
93 | 2148,2148,29.0,21.0,"san salvador, el salvador",spanish159,spanish,male,2149,el salvador
94 | 2149,2149,20.0,7.0,"bogota, colombia",spanish160,spanish,male,2150,colombia
95 | 2168,2168,28.0,5.0,"mexico city, mexico",spanish162,spanish,male,2169,mexico
96 |
--------------------------------------------------------------------------------
/code/mp3_getter.py:
--------------------------------------------------------------------------------
1 | import urllib
2 | import time
3 | import shutil
4 | from requests import get
5 | from bs4 import BeautifulSoup
6 | import pandas as pd
7 | import numpy as np
8 |
9 |
10 | # from the accent.gmu website, pass in list of languages to scrape mp3 files and save them to disk
11 | def mp3getter(lst):
12 | for j in range(len(lst)):
13 | for i in range(1,lst[j][1]+1):
14 | while True:
15 | try:
16 | urllib.urlretrieve("http://accent.gmu.edu/soundtracks/{0}{1}.mp3".format(lst[j][0], i), '{0}{1}.mp3'.format(lst[j][0], i))
17 | except:
18 | time.sleep(2)
19 | else:
20 | break
21 |
22 | # from list of languages, return urls of each language landing page
23 | def lang_pages(lst):
24 | urls=[]
25 | for lang in lst:
26 | urls.append('http://accent.gmu.edu/browse_language.php?function=find&language={}'.format(lang))
27 | return urls
28 |
29 | #output:
30 | #
31 | # ['http://accent.gmu.edu/browse_language.php?function=find&language=amharic',
32 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=arabic',
33 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=bengali',
34 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=bulgarian',
35 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=cantonese',
36 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=dutch',
37 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=english',
38 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=farsi',
39 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=french',
40 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=german',
41 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=greek',
42 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=hindi',
43 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=italian',
44 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=japanese',
45 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=korean',
46 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=kurdish',
47 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=macedonian',
48 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=mandarin',
49 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=miskito',
50 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=nepali',
51 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=pashto',
52 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=polish',
53 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=portuguese',
54 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=punjabi',
55 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=romanian',
56 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=russian',
57 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=serbian',
58 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=spanish',
59 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=swedish',
60 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=tagalog',
61 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=thai',
62 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=turkish',
63 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=ukrainian',
64 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=urdu',
65 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=vietnamese']
66 |
67 | # from http://accent.gmu.edu/browse_language.php, return list of languages
68 | def get_languages():
69 | url = "http://accent.gmu.edu/browse_language.php"
70 | html = get(url)
71 | soup = BeautifulSoup(html.content, 'html.parser')
72 | languages = []
73 | language_lists = soup.findAll('ul', attrs={'class': 'languagelist'})
74 | for ul in language_lists:
75 | for li in ul.findAll('li'):
76 | languages.append(li.text)
77 | return languages
78 |
79 | # from list of languages, return list of urls
80 | def get_language_urls(lst):
81 | urls = []
82 | for language in lst:
83 | urls.append('http://accent.gmu.edu/browse_language.php?function=find&language=' + language)
84 | return urls
85 |
86 | # from language, get the number of speakers of that language
87 | def get_num(language):
88 | url = 'http://accent.gmu.edu/browse_language.php?function=find&language=' + language
89 | html = get(url)
90 | soup = BeautifulSoup(html.content, 'html.parser')
91 | test = soup.find_all('div', attrs={'class': 'content'})
92 | try:
93 | num = int(test[0].find('h5').text.split()[2])
94 | except AttributeError:
95 | num = 0
96 | return num
97 |
98 | # from list of languages, return list of tuples (LANGUAGE, LANGUAGE_NUM_SPEAKERS) for mp3getter, ignoring languages
99 | # with 0 speakers
100 | def get_formatted_languages(languages):
101 | formatted_languages = []
102 | for language in languages:
103 | num = get_num(language)
104 | if num != 0:
105 | formatted_languages.append((language,num))
106 | return formatted_languages
107 |
108 | # from each language whose url is contained in the above list, save the number of speakers of that language to a list
109 | def get_nums(lst):
110 | nums = []
111 | for url in lst:
112 | html = get(url)
113 | soup = BeautifulSoup(html.content, 'html.parser')
114 | test = soup.find_all('div', attrs={'class': 'content'})
115 | nums.append(int(test[0].find('h5').text.split()[2]))
116 | return nums
117 |
118 | def get_speaker_info(start, stop):
119 | '''
120 | Inputs: two integers, corresponding to min and max speaker id number per language
121 | Outputs: Pandas Dataframe containing speaker filename, birthplace, native_language, age, sex, age_onset of English
122 | '''
123 |
124 | user_data = []
125 | for num in range(start,stop):
126 | info = {'speakerid': num, 'filename': 0, 'birthplace':1, 'native_language': 2, 'age':3, 'sex':4, 'age_onset':5}
127 | url = "http://accent.gmu.edu/browse_language.php?function=detail&speakerid={}".format(num)
128 | html = get(url)
129 | soup = BeautifulSoup(html.content, 'html.parser')
130 | body = soup.find_all('div', attrs={'class': 'content'})
131 | try:
132 | info['filename']=str(body[0].find('h5').text.split()[0])
133 | bio_bar = soup.find_all('ul', attrs={'class':'bio'})
134 | info['birthplace'] = str(bio_bar[0].find_all('li')[0].text)[13:-6]
135 | info['native_language'] = str(bio_bar[0].find_all('li')[1].text.split()[2])
136 | info['age'] = float(bio_bar[0].find_all('li')[3].text.split()[2].strip(','))
137 | info['sex'] = str(bio_bar[0].find_all('li')[3].text.split()[3].strip())
138 | info['age_onset'] = float(bio_bar[0].find_all('li')[4].text.split()[4].strip())
139 | user_data.append(info)
140 | except:
141 | info['filename'] = ''
142 | info['birthplace'] = ''
143 | info['native_language'] = ''
144 | info['age'] = ''
145 | info['sex'] = ''
146 | info['age_onset'] = ''
147 | user_data.append(info)
148 | df = pd.DataFrame(user_data)
149 | df.to_csv('speaker_info_{}.csv'.format(stop))
150 | return df
151 |
152 | # copy files from one list of wav files to a specified location
153 | def copy_files(lst, path):
154 | for filename in lst:
155 | shutil.copy2('{}.wav'.format(filename), '{}/{}.wav'.format(path, filename))
156 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Accent Classification in Spoken English
2 |
3 | This repo contains the code, images and .csv files for my accent classification project. The goal of this project is to train a neural network to distinguish and classify the accents of English speakers from different geographic and linguistic backgrounds. The code section includes python files both for preprocessing of audio files into useful data objects and models of neural net classifiers.
4 |
5 |
6 | ## Table of Contents
7 | 1. [Motivation](#motivation)
8 | 2. [Dataset](#dataset)
9 | 3. [Signal Processing](#signal-processing)
10 | 4. [Features from MFCCs](#features)
11 | 5. [Neural Network Models](#models)
12 | * [1D Convolutional Neural Net](#1dconvnet)
13 | * [LSTM Recurrent Neural Net](#rnn)
14 | * [Results](#results)
15 | 6. [Future Directions](#future-directions)
16 |
17 | ## Motivation
18 |
19 | The motivation behind developing a model to recognize accents in spoken English is primarily twofold. First, if it is possible to determine a speaker's geographic location or native language simply by their accent, then it might be possible, for instance in a call center, to more efficiently route that person to a regional representative or to a speaker of an appropriate language. Secondly, accent recognition is simply a necessary precursor to automatic speech recognition (ASR), such as is found in Siri--to understand what a person is saying, there must be a model in place that expects how they are going to say it.
20 |
21 | The decision to use neural networks to tackle this problem, which has frequently been approached with other methods in the past, was one done for the sake of novelty and because nets perform well for a variety of media classification tasks.
22 |
23 | ## Dataset
24 |
25 | All of the speech files used for this project come from the [Speech Accent Archive](http://accent.gmu.edu), a repository of spoken English hosted by George Mason University. Over 2000 speakers representing over 100 native languages read a common elicitation paragraph in English:
26 |
27 | >Please call Stella. Ask her to bring these things with her from the store: Six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob. We also need a small plastic snake and a big toy frog for the kids. She can scoop these things into three red bags, and we will go meet her Wednesday at the train station.
28 |
29 |
30 | The common nature of the dataset makes it ideal for studying accent, being that the wording is provided and the recording quality is (nearly) uniform across all speakers.
31 |
32 |
33 |
34 | Figure 1: Example [speech accent archive](http://accent.gmu.edu) page for a speaker, including the audio file, a phonetic transcription of their reading, as well as biographical data.
35 |
36 | For each of the bigger languages (defined by me as having at least 10 speakers represented in the dataset), I scraped the site and saved the mp3 files to folders and the biographical data to [csv files](https://github.com/dwww2012/Accent-Classifier/tree/master/dataframes)--for manipulation using Pandas DataFrames.
37 |
38 | ## Signal Processing
39 |
40 | Having saved all the .mp3 files, I then converted them to .wav files, being that this is a more universal (and less compressed) format for audio processing. (Functions used in this process of audio retrieval and ordering can be found in my **code** directory [here](https://github.com/dwww2012/Accent-Classifier/tree/master/code/mp3_getter.py).)
41 |
42 | The .wav files themselves are routinely visualized as waveforms such as this:
43 |
44 |
45 | Figure 2: Waveform for the 'english1' file, showing speech power over time (roughly 20 seconds).
46 |
47 | Depicted here is the graph of a numerical representation of a 21 second audio file, corresponding essentially to energy (i.e. pressure) on the y-axis and time on the x-axis. Being that the sample rate is set at 44100 samples/sec, this gives us a 1-dimensional vector of nearly 1 million values in less than 30 seconds!
48 |
49 | Attempts were made to process that raw data, but passing a vector with of order 1 million features into a machine learning algorithm seemed intractable. One attempt was to downsample (i.e., reduce the sampling rate) of the audio. To arrive at a reasonable number of features, however, meant losing almost all recognizable speech signal, let alone accent information, contained in the file. Thus it was necessary to use a lower-dimensional but more highly significant feature space to represent the speech files.
50 |
51 | ## Features from MFCCs
52 |
53 | To featurize the audio files, I used mel frequency cepstral coefficients (MFCCs), which are a decades old tool for representing human speech as it is perceived. Quoting from [practical cryptography](http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/),
54 | > The main point to understand about speech is that the sounds generated by a human are filtered by the shape of the vocal tract including tongue, teeth etc. This shape determines what sound comes out. If we can determine the shape accurately, this should give us an accurate representation of the phoneme being produced. The shape of the vocal tract manifests itself in the envelope of the short time power spectrum, and the job of MFCCs is to accurately represent this envelope.
55 |
56 | This is achieved through six steps:
57 | 1. Frame the signal into short frames.
58 | 2. For each frame calculate the periodogram estimate of the power spectrum.
59 | 3. Apply the mel filterbank to the power spectra, sum the energy in each filter.
60 | 4. Take the logarithm of all filterbank energies.
61 | 5. Take the DCT of the log filterbank energies.
62 | 6. Keep DCT coefficients 2-13, discard the rest.
63 |
64 |
65 |
66 | Figure 3: Schematic representation of the steps necessary to create a Mel frequency cepstral coefficient (MFCC) from an audio signal.
67 |
68 | The output of this process is a 13-dimensional vector, each dimension corresponding to a different band in human hearing.
69 |
70 |
71 |
72 | Figure 4: Visual representation of the 13 MFCCs for the 'english1' audio file over ~22s.
73 |
74 | In my data, being that every speech instance was different, I had to normalize all of them to some standard, so that the MFCCs had the same dimensions. I chose to trim/pad them all to be ~30 seconds in length. Thus given that my MFCC moving window is 0.1s, this yielded MFCCs of shape (2999, 13).
75 |
76 | Code necessary for the creation of these features and other pre-processing to feed into my model(s) can be found [here](https://github.com/dwww2012/Accent-Classifier/tree/master/code/testing.py).
77 |
78 | ## Neural Network Models
79 |
80 | The problem of machine learning accent recognition has traditionally been handled with some combination of support vector machines (SVMs), hidden Markov models (HMMs), Gaussian mixture models (GMMs) and dynamic time warping (DTW). These tools allow machines to arrive at the most important features of speech, controlling for inherent temporal variation.
81 |
82 | I found the prospect of a neural network appealing in that it can train itself to learn the important features without needing to controlling explicitly for time--that is, the difference in speaking pace between two speakers needn't necessarily be controlled for (as with DTW), because a convolutional or recurrent net will update its weights across a sequence of any length.
83 |
84 | ### 1D Convolutional Neural Net
85 |
86 | Two-dimensional convolutional neural networks (CNNs) are increasingly used as the go-to machine learning algorithm for computer vision--i.e., recognizing an image. Less used but no less powerful--in their domain of application--are 1D CNNs, which are especially suited for text or time-series data, such as mine. The key underlying trait of convolutional nets is that they are spatially (or temporally) invariant, and thus inherent variation in speech patterns is controlled for by the convolutions and pooling.
87 |
88 | My [CNN model](https://github.com/dwww2012/Accent-Classifier/blob/master/code/conv_1d_model_aws.py) was built on top of the [Keras](http://keras.io) deep learning Python package and largely based on a [text processing example](https://github.com/fchollet/keras/blob/master/examples/imdb_cnn.py). I performed many adjustments on my net depending on how many and which accents I was modeling. In its current form, it features six convolutional layers with batch normalization after each to control for weights veering too far from (0,1) and MaxPooling after all but the first convolution.
89 |
90 | The model was trained on (generally) 85% of my data, using either full (2999, 13)-dimensional MFCCs or else split (5, 999, 13)-dimensional MFCCs, where each 30 second file is split into 5 overlapping 10 second files. This latter split was used simply to give me 5 times more training samples in the case of small data.
91 |
92 | ### LSTM Recurrent Neural Net
93 |
94 | Another, potentially more promising, variety of neural network I was eager to try was the recurrent neural net (RNN). Specifically, I implemented a long short-term memory (LSTM) RNN. RNNs generally are nets wherein the weights are updated *between* hidden cells in the same layer, and these updates occur recurrently for passed in sequences (such as time-varying speech signal data!). An LSTM is a specific version of RNN that employs *memory cells* to preserve data throughout the sequence of unknown duration, such as ours.
95 |
96 | Again, my [LSTM RNN model](https://github.com/dwww2012/Accent-Classifier/blob/master/code/rnn_example.py) was built on top of Keras. It features three LSTM layers and again feeds in (n, 999, 13)-dimensional MFCCs as its inputs. The model can be passed as either 'stateful' or not, meaning that the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch--in this case, the 3-dimensional batch size must be passed explicitly into the model's first layer.
97 |
98 | ### Results
99 |
100 | After training both the CNN and RNN on the top 5 most common languages--those being English, Spanish, Arabic, Mandarin, and French--my best results were ~86% precision for US English classification. Other languages lagged behind, with Arabic, Spanish and Mandarin in the 65-75% range and French performing especially poorly. Focusing only on the top three languages, my RNN outperformed my CNN, yielding **accuracy and recall of 75%+ for English, Spanish and Arabic**.
101 |
102 | ## Future Directions
103 |
104 | Going forward, my I would like to improve my net(s) so that they can distinguish more languages more reliably. That most likely will entail building deeper nets. Moreover, it is likely that I will simply need more and better data to do that. My training sets are on the order of hundreds of observations, which are very small numbers for a neural network. Moreover, many speakers have only faint accents. To train a model well, I would need to manually select only those speakers with a strong accent.
105 |
106 | My ultimate goal is to build a live web app that take 'wild' (i.e., unstructured) speech audio and make a prediction of the age, gender and geographic location/language of the speaker.
107 |
--------------------------------------------------------------------------------
/dataframes/df_usa_english_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,state
2 | 60,60,42.0,0.0,"pittsburgh, pennsylvania, usa",english1,english,male,61,usa,pennsylvania
3 | 64,64,22.0,0.0,"torrington, connecticut, usa",english102,english,male,65,usa,connecticut
4 | 65,65,21.0,0.0,"staten island, new york, usa",english103,english,male,66,usa,new york
5 | 72,72,21.0,0.0,"wise, virginia, usa",english16,english,male,73,usa,virginia
6 | 73,73,79.0,0.0,"darwin va, virginia, usa",english17,english,male,74,usa,virginia
7 | 75,75,53.0,0.0,"louisville, kentucky, usa",english19,english,male,76,usa,kentucky
8 | 80,80,43.0,0.0,"englewood, tennessee, usa",english23,english,male,81,usa,tennessee
9 | 82,82,57.0,0.0,"atlanta, georgia, usa",english25,english,male,83,usa,georgia
10 | 83,83,71.0,0.0,"smith island, maryland, usa",english26,english,male,84,usa,maryland
11 | 91,91,21.0,0.0,"san diego, california, usa",english33,english,male,92,usa,california
12 | 94,94,60.0,0.0,"arcadia, wisconsin, usa",english36,english,male,95,usa,wisconsin
13 | 104,104,52.0,0.0,"pine bluff, arkansas, usa",english45,english,male,105,usa,arkansas
14 | 105,105,73.0,0.0,"creswell, north carolina, usa",english46,english,male,106,usa,north carolina
15 | 108,108,43.0,0.0,"castro valley, california, usa",english49,english,male,109,usa,california
16 | 109,109,62.0,0.0,"fairfax, virginia, usa",english5,english,male,110,usa,virginia
17 | 111,111,67.0,0.0,"detroit, michigan, usa",english51,english,male,112,usa,michigan
18 | 113,113,63.0,0.0,"syracuse, new york, usa",english53,english,male,114,usa,new york
19 | 119,119,30.0,0.0,"columbus, ohio, usa",english59,english,male,120,usa,ohio
20 | 121,121,18.0,0.0,"chesapeake, virginia, usa",english60,english,male,122,usa,virginia
21 | 123,123,30.0,0.0,"west jordan, utah, usa",english62,english,male,124,usa,utah
22 | 124,124,21.0,0.0,"lewisville, texas, usa",english63,english,male,125,usa,texas
23 | 126,126,47.0,0.0,"lewistown, pennsylvania, usa",english65,english,male,127,usa,pennsylvania
24 | 127,127,19.0,0.0,"baltimore, maryland, usa",english66,english,male,128,usa,maryland
25 | 128,128,37.0,0.0,"princeton, indiana, usa",english67,english,male,129,usa,indiana
26 | 129,129,52.0,0.0,"new york, new york, usa",english68,english,male,130,usa,new york
27 | 131,131,52.0,0.0,"macon, mississippi, usa",english7,english,male,132,usa,mississippi
28 | 132,132,21.0,0.0,"beaumont, texas, usa",english70,english,male,133,usa,texas
29 | 133,133,20.0,0.0,"west covina, california, usa",english71,english,male,134,usa,california
30 | 136,136,21.0,0.0,"wichita, kansas, usa",english74,english,male,137,usa,kansas
31 | 137,137,32.0,0.0,"idaho falls, idaho, usa",english75,english,male,138,usa,idaho
32 | 141,141,73.0,0.0,"boston, massachusetts, usa",english79,english,male,142,usa,massachusetts
33 | 144,144,30.0,0.0,"st. paul, minnesota, usa",english81,english,male,145,usa,minnesota
34 | 149,149,50.0,0.0,"charleston, south carolina, usa",english86,english,male,150,usa,south carolina
35 | 151,151,18.0,0.0,"grand rapids, michigan, usa",english88,english,male,152,usa,michigan
36 | 152,152,18.0,0.0,"kingston ma, massachusetts, usa",english89,english,male,153,usa,massachusetts
37 | 154,154,53.0,0.0,"pittsburgh, pennsylvania, usa",english90,english,male,155,usa,pennsylvania
38 | 159,159,18.0,0.0,"mishawaka, indiana, usa",english95,english,male,160,usa,indiana
39 | 160,160,31.0,0.0,"point pleasant, new jersey, usa",english96,english,male,161,usa,new jersey
40 | 161,161,42.0,0.0,"wilmington nc, north carolina, usa",english97,english,male,162,usa,north carolina
41 | 162,162,22.0,0.0,"spokane, washington, usa",english98,english,male,163,usa,washington
42 | 415,415,19.0,0.0,"chicago, illinois, usa",english107,english,male,416,usa,illinois
43 | 443,443,56.0,0.0,"atlanta, georgia, usa",english116,english,male,444,usa,georgia
44 | 445,445,19.0,0.0,"akron, ohio, usa",english118,english,male,446,usa,ohio
45 | 464,464,21.0,0.0,"waconia, minnesota, usa",english121,english,male,465,usa,minnesota
46 | 479,479,38.0,0.0,"brooklyn, new york, usa",english124,english,male,480,usa,new york
47 | 488,488,39.0,0.0,"new britain, connecticut, usa",english127,english,male,489,usa,connecticut
48 | 496,496,23.0,0.0,"west palm beach, florida, usa",english131,english,male,497,usa,florida
49 | 506,506,22.0,0.0,"orange beach, alabama, usa",english135,english,male,507,usa,alabama
50 | 508,508,26.0,0.0,"charleston, west virginia, usa",english137,english,male,509,usa,west virginia
51 | 510,510,20.0,0.0,"providence, rhode island, usa",english139,english,male,511,usa,rhode island
52 | 516,516,19.0,0.0,"avon, new york, usa",english142,english,male,517,usa,new york
53 | 517,517,42.0,0.0,"minneapolis, minnesota, usa",english143,english,male,518,usa,minnesota
54 | 521,521,22.0,0.0,"blytheville, arkansas, usa",english146,english,male,522,usa,arkansas
55 | 525,525,18.0,0.0,"lawrenceville, georgia, usa",english149,english,male,526,usa,georgia
56 | 526,526,31.0,0.0,"brownsville, kentucky, usa",english150,english,male,527,usa,kentucky
57 | 527,527,18.0,0.0,"baltimore, maryland, usa",english151,english,male,528,usa,maryland
58 | 534,534,35.0,0.0,"oakland, california, usa",english155,english,male,535,usa,california
59 | 537,537,35.0,0.0,"fort worth, texas, usa",english157,english,male,538,usa,texas
60 | 546,546,41.0,0.0,"fairview park, ohio, usa",english163,english,male,547,usa,ohio
61 | 550,550,18.0,0.0,"glenside, pennsylvania, usa",english166,english,male,551,usa,pennsylvania
62 | 551,551,43.0,0.0,"lakeview, michigan, usa",english167,english,male,552,usa,michigan
63 | 553,553,40.0,0.0,"oceanside, california, usa",english168,english,male,554,usa,california
64 | 554,554,18.0,0.0,"cleveland, mississippi, usa",english169,english,male,555,usa,mississippi
65 | 562,562,21.0,0.0,"st. louis, missouri, usa",english171,english,male,563,usa,missouri
66 | 566,566,19.0,0.0,"libertyville, illinois, usa",english173,english,male,567,usa,illinois
67 | 571,571,24.0,0.0,"seattle, washington, usa",english175,english,male,572,usa,washington
68 | 582,582,30.0,0.0,"wilkes-barre, pennsylvania, usa",english178,english,male,583,usa,pennsylvania
69 | 583,583,57.0,0.0,"huron, south dakota, usa",english179,english,male,584,usa,south dakota
70 | 584,584,20.0,0.0,"newport, rhode island, usa",english180,english,male,585,usa,rhode island
71 | 585,585,68.0,0.0,"new eagle, pennsylvania, usa",english181,english,male,586,usa,pennsylvania
72 | 589,589,50.0,0.0,"troy, new york, usa",english182,english,male,590,usa,new york
73 | 616,616,19.0,0.0,"grand forks, north dakota, usa",english189,english,male,617,usa,north dakota
74 | 617,617,27.0,0.0,"oak park, illinois, usa",english190,english,male,618,usa,illinois
75 | 630,630,19.0,0.0,"las cruces, new mexico, usa",english191,english,male,631,usa,new mexico
76 | 636,636,32.0,0.0,"honolulu, hawaii, usa",english193,english,male,637,usa,hawaii
77 | 661,661,22.0,0.0,"iowa city, iowa, usa",english197,english,male,662,usa,iowa
78 | 662,662,56.0,0.0,"washington, district of columbia, usa",english198,english,male,663,usa,district of columbia
79 | 663,663,25.0,0.0,"bay shore, new york, usa",english199,english,male,664,usa,new york
80 | 672,672,19.0,0.0,"crisfield, maryland, usa",english202,english,male,673,usa,maryland
81 | 677,677,53.0,0.0,"chicago, illinois, usa",english204,english,male,678,usa,illinois
82 | 694,694,46.0,0.0,"washington, dc, usa",english208,english,male,695,usa,dc
83 | 719,719,30.0,0.0,"worcester, massachusetts, usa",english211,english,male,720,usa,massachusetts
84 | 728,728,24.0,0.0,"spartanburg, south carolina, usa",english212,english,male,729,usa,south carolina
85 | 733,733,38.0,0.0,"dodge city, kansas, usa",english213,english,male,734,usa,kansas
86 | 736,736,26.0,0.0,"myrtle beach, south carolina, usa",english214,english,male,737,usa,south carolina
87 | 766,766,18.0,0.0,"erie, pennsylvania, usa",english223,english,male,767,usa,pennsylvania
88 | 774,774,25.0,0.0,"burlington, vermont, usa",english229,english,male,775,usa,vermont
89 | 790,790,22.0,0.0,"tampa, florida, usa",english231,english,male,791,usa,florida
90 | 793,793,49.0,0.0,"bloomington, indiana, usa",english234,english,male,794,usa,indiana
91 | 798,798,44.0,0.0,"st. charles, illinois, usa",english236,english,male,799,usa,illinois
92 | 820,820,23.0,0.0,"san francisco, california, usa",english244,english,male,821,usa,california
93 | 823,823,25.0,0.0,"pittsburgh, pennsylvania, usa",english245,english,male,824,usa,pennsylvania
94 | 824,824,32.0,0.0,"los angeles, california, usa",english246,english,male,825,usa,california
95 | 854,854,21.0,0.0,"mt. kisco, new york, usa",english251,english,male,855,usa,new york
96 | 857,857,39.0,0.0,"eugene, oregon, usa",english254,english,male,858,usa,oregon
97 | 860,860,23.0,0.0,"laurinburg, north carolina, usa",english256,english,male,861,usa,north carolina
98 | 862,862,24.0,0.0,"san diego, california, usa",english257,english,male,863,usa,california
99 | 875,875,60.0,0.0,"naylor, maryland, usa",english262,english,male,876,usa,maryland
100 | 883,883,21.0,0.0,"oak forest, illinois, usa",english263,english,male,884,usa,illinois
101 | 888,888,21.0,0.0,"freemont, california, usa",english264,english,male,889,usa,california
102 | 889,889,31.0,0.0,"vancouver wa, washington, usa",english265,english,male,890,usa,washington
103 | 898,898,18.0,0.0,"san diego, california, usa",english266,english,male,899,usa,california
104 | 904,904,20.0,0.0,"dallas, texas, usa",english272,english,male,905,usa,texas
105 | 906,906,46.0,0.0,"blue bell, pennsylvania, usa",english273,english,male,907,usa,pennsylvania
106 | 936,936,23.0,0.0,"ramsey mn, minnesota, usa",english281,english,male,937,usa,minnesota
107 | 938,938,20.0,0.0,"tampa, florida, usa",english282,english,male,939,usa,florida
108 | 950,950,18.0,0.0,"pelham, new york, usa",english283,english,male,951,usa,new york
109 | 951,951,33.0,0.0,"gainesville, florida, usa",english284,english,male,952,usa,florida
110 | 985,985,20.0,0.0,"duluth, georgia, usa",english290,english,male,986,usa,georgia
111 | 1050,1050,38.0,0.0,"kansas city, missouri, usa",english292,english,male,1051,usa,missouri
112 | 1070,1070,19.0,0.0,"lindenhurst, new york, usa",english297,english,male,1071,usa,new york
113 | 1099,1099,48.0,0.0,"alexandria, va, usa",english307,english,male,1100,usa,va
114 | 1118,1118,28.0,0.0,"silver spring, maryland, usa",english313,english,male,1119,usa,maryland
115 | 1120,1120,25.0,0.0,"los angeles, california, usa",english315,english,male,1121,usa,california
116 | 1121,1121,33.0,0.0,"sacramento, california, usa",english316,english,male,1122,usa,california
117 | 1131,1131,26.0,0.0,"burlington, vermont, usa",english317,english,male,1132,usa,vermont
118 | 1162,1162,52.0,0.0,"brooklyn, new york, usa",english321,english,male,1163,usa,new york
119 | 1175,1175,32.0,0.0,"reading, pennsylvania, usa",english325,english,male,1176,usa,pennsylvania
120 | 1187,1187,53.0,0.0,"fairborn, ohio, usa",english326,english,male,1188,usa,ohio
121 | 1204,1204,27.0,0.0,"dunedin, florida, usa",english327,english,male,1205,usa,florida
122 | 1215,1215,6.0,0.0,"washington, district of columbia, usa",english335,english,male,1216,usa,district of columbia
123 | 1219,1219,54.0,0.0,"chicago, illinois, usa",english339,english,male,1220,usa,illinois
124 | 1221,1221,19.0,0.0,"charleston, west virginia, usa",english340,english,male,1222,usa,west virginia
125 | 1224,1224,33.0,0.0,"kansas city, missouri, usa",english342,english,male,1225,usa,missouri
126 | 1225,1225,23.0,0.0,"portland, maine, usa",english343,english,male,1226,usa,maine
127 | 1232,1232,20.0,0.0,"kansas city, missouri, usa",english345,english,male,1233,usa,missouri
128 | 1234,1234,60.0,0.0,"east hartford, connecticut, usa",english346,english,male,1235,usa,connecticut
129 | 1241,1241,37.0,0.0,"carthage, missouri, usa",english351,english,male,1242,usa,missouri
130 | 1255,1255,19.0,0.0,"los angeles, california, usa",english355,english,male,1256,usa,california
131 | 1257,1257,24.0,0.0,"alto, georgia, usa",english357,english,male,1258,usa,georgia
132 | 1296,1296,27.0,0.0,"mcminnville, oregon, usa",english369,english,male,1297,usa,oregon
133 | 1306,1306,80.0,0.0,"christiansburg, virginia, usa",english372,english,male,1307,usa,virginia
134 | 1307,1307,22.0,0.0,"russellville, kentucky, usa",english373,english,male,1308,usa,kentucky
135 | 1311,1311,28.0,0.0,"manchester, connecticut, usa",english375,english,male,1312,usa,connecticut
136 | 1314,1314,46.0,0.0,"pasadena, california, usa",english376,english,male,1315,usa,california
137 | 1320,1320,43.0,0.0,"summit, new jersey, usa",english379,english,male,1321,usa,new jersey
138 | 1323,1323,85.0,0.0,"pike county, kentucky, usa",english381,english,male,1324,usa,kentucky
139 | 1324,1324,34.0,0.0,"arlington, virginia, usa",english382,english,male,1325,usa,virginia
140 | 1326,1326,57.0,0.0,"williamson, west virginia, usa",english384,english,male,1327,usa,west virginia
141 | 1333,1333,74.0,0.0,"milwaukee, wisconsin, usa",english390,english,male,1334,usa,wisconsin
142 | 1335,1335,62.0,0.0,"warrenton, virginia, usa",english392,english,male,1336,usa,virginia
143 | 1339,1339,39.0,0.0,"alexandria, virginia, usa",english395,english,male,1340,usa,virginia
144 | 1360,1360,64.0,0.0,"lynwood, california, usa",english408,english,male,1361,usa,california
145 | 1374,1374,32.0,0.0,"woonsocket, rhode island, usa",english415,english,male,1375,usa,rhode island
146 | 1384,1384,27.0,0.0,"youngstown, ohio, usa",english419,english,male,1385,usa,ohio
147 | 1389,1389,31.0,0.0,"rochester, new york, usa",english422,english,male,1390,usa,new york
148 | 1408,1408,75.0,0.0,"danville, virginia, usa",english424,english,male,1409,usa,virginia
149 | 1409,1409,31.0,0.0,"danville, virginia, usa",english425,english,male,1410,usa,virginia
150 | 1468,1468,24.0,0.0,"kansas city, kansas, usa",english434,english,male,1469,usa,kansas
151 | 1476,1476,21.0,0.0,"cleveland, ohio, usa",english437,english,male,1477,usa,ohio
152 | 1527,1527,23.0,0.0,"salisbury mills, new york, usa",english442,english,male,1528,usa,new york
153 | 1529,1529,31.0,0.0,"fort collins, colorado, usa",english443,english,male,1530,usa,colorado
154 | 1530,1530,24.0,0.0,"anniston, alabama, usa",english444,english,male,1531,usa,alabama
155 | 1531,1531,60.0,0.0,"san francisco, california, usa",english445,english,male,1532,usa,california
156 | 1533,1533,21.0,0.0,"hartford, connecticut, usa",english447,english,male,1534,usa,connecticut
157 | 1537,1537,27.0,0.0,"barton, vermont, usa",english449,english,male,1538,usa,vermont
158 | 1545,1545,44.0,0.0,"hazlehurst, georgia, usa",english451,english,male,1546,usa,georgia
159 | 1554,1554,43.0,0.0,"syracuse, new york, usa",english457,english,male,1555,usa,new york
160 | 1559,1559,19.0,0.0,"charlotte, north carolina, usa",english459,english,male,1560,usa,north carolina
161 | 1563,1563,20.0,0.0,"paducah, kentucky, usa",english462,english,male,1564,usa,kentucky
162 | 1636,1636,18.0,0.0,"portland, maine, usa",english466,english,male,1637,usa,maine
163 | 1639,1639,30.0,0.0,"myrtle beach, south carolina, usa",english468,english,male,1640,usa,south carolina
164 | 1640,1640,22.0,0.0,"knoxville, tennessee, usa",english469,english,male,1641,usa,tennessee
165 | 1664,1664,58.0,0.0,"winston-salem, north carolina, usa",english480,english,male,1665,usa,north carolina
166 | 1667,1667,19.0,0.0,"boston, massachusetts, usa",english483,english,male,1668,usa,massachusetts
167 | 1673,1673,20.0,0.0,"cromwell, connecticut, usa",english485,english,male,1674,usa,connecticut
168 | 1717,1717,20.0,0.0,"miami, florida, usa",english493,english,male,1718,usa,florida
169 | 1718,1718,27.5,0.0,"findlay, ohio, usa",english494,english,male,1719,usa,ohio
170 | 1719,1719,21.0,0.0,"kirkland, washington, usa",english495,english,male,1720,usa,washington
171 | 1725,1725,22.0,0.0,"cleveland, ohio, usa",english497,english,male,1726,usa,ohio
172 | 1785,1785,25.0,0.0,"west palm beach, florida, usa",english504,english,male,1786,usa,florida
173 | 1871,1871,57.0,0.0,"youngstown, ohio, usa",english514,english,male,1872,usa,ohio
174 | 1875,1875,33.0,0.0,"colorado springs, colorado, usa",english517,english,male,1876,usa,colorado
175 | 1889,1889,19.0,0.0,"downers grove, illinois, usa",english524,english,male,1890,usa,illinois
176 | 1894,1894,20.0,0.0,"woodbridge, virginia, usa",english526,english,male,1895,usa,virginia
177 | 1969,1969,31.0,0.0,"hanover, new hampshire, usa",english544,english,male,1970,usa,new hampshire
178 | 1994,1994,56.0,0.0,"tangier island, virginia, usa",english545,english,male,1995,usa,virginia
179 | 1996,1996,63.0,0.0,"tangier island, virginia, usa",english547,english,male,1997,usa,virginia
180 | 2027,2027,82.0,0.0,"fredericksburg, virginia, usa",english550,english,male,2028,usa,virginia
181 | 2045,2045,21.0,0.0,"nashville, tennessee, usa",english553,english,male,2046,usa,tennessee
182 | 2048,2048,90.0,0.0,"brooklyn, new york, usa",english555,english,male,2049,usa,new york
183 | 2054,2054,23.0,0.0,"queens, new york, usa",english558,english,male,2055,usa,new york
184 | 2073,2073,32.0,0.0,"manassas, virginia, usa",english563,english,male,2074,usa,virginia
185 | 2076,2076,52.0,0.0,"casper, wyoming, usa",english564,english,male,2077,usa,wyoming
186 | 2100,2100,24.0,0.0,"new york, new york, usa",english567,english,male,2101,usa,new york
187 | 2112,2112,39.0,0.0,"washington, dc, usa",english571,english,male,2113,usa,dc
188 | 2120,2120,51.0,0.0,"fort worth, texas, usa",english572,english,male,2121,usa,texas
189 | 2123,2123,46.0,0.0,"painesville, ohio, usa",english573,english,male,2124,usa,ohio
190 | 2164,2164,24.0,0.0,"great falls, virginia, usa",english575,english,male,2165,usa,virginia
191 |
--------------------------------------------------------------------------------
/dataframes/df_usa_english_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,state
2 | 61,61,35.0,0.0,"davenport, iowa, usa",english10,english,female,62,usa,iowa
3 | 62,62,23.0,0.0,"miami, florida, usa",english100,english,female,63,usa,florida
4 | 66,66,18.0,0.0,"youngstown, ohio, usa",english104,english,female,67,usa,ohio
5 | 71,71,7.0,0.0,"norton, virginia, usa",english15,english,female,72,usa,virginia
6 | 74,74,39.0,0.0,"dickenson county, virginia, usa",english18,english,female,75,usa,virginia
7 | 77,77,27.0,0.0,"mt. holly, north carolina, usa",english20,english,female,78,usa,north carolina
8 | 78,78,37.0,0.0,"boston, massachusetts, usa",english21,english,female,79,usa,massachusetts
9 | 84,84,37.0,0.0,"smith island, maryland, usa",english27,english,female,85,usa,maryland
10 | 90,90,50.0,0.0,"fresno, california, usa",english32,english,female,91,usa,california
11 | 92,92,18.0,0.0,"ronkonkoma, new york, usa",english34,english,female,93,usa,new york
12 | 93,93,60.0,0.0,"milwaukee, wisconsin, usa",english35,english,female,94,usa,wisconsin
13 | 97,97,59.0,0.0,"new orleans, louisiana, usa",english39,english,female,98,usa,louisiana
14 | 103,103,63.0,0.0,"winston salem, north carolina, usa",english44,english,female,104,usa,north carolina
15 | 106,106,76.0,0.0,"plantersville, arkansas, usa",english47,english,female,107,usa,arkansas
16 | 110,110,29.0,0.0,"baltic, south dakota, usa",english50,english,female,111,usa,south dakota
17 | 114,114,40.0,0.0,"new albany, indiana, usa",english54,english,female,115,usa,indiana
18 | 115,115,53.0,0.0,"st. louis, missouri, usa",english55,english,female,116,usa,missouri
19 | 120,120,45.0,0.0,"brooklyn, new york, usa",english6,english,female,121,usa,new york
20 | 138,138,18.0,0.0,"boston, massachusetts, usa",english76,english,female,139,usa,massachusetts
21 | 140,140,20.0,0.0,"caribou, maine, usa",english78,english,female,141,usa,maine
22 | 145,145,50.0,0.0,"chicago, illinois, usa",english82,english,female,146,usa,illinois
23 | 146,146,42.0,0.0,"winnfield, louisiana, usa",english83,english,female,147,usa,louisiana
24 | 153,153,48.0,0.0,"carthage, texas, usa",english9,english,female,154,usa,texas
25 | 156,156,22.0,0.0,"wisconsin rapids, wisconsin, usa",english92,english,female,157,usa,wisconsin
26 | 157,157,22.0,0.0,"mamou, louisiana, usa",english93,english,female,158,usa,louisiana
27 | 158,158,55.0,0.0,"pensacola, florida, usa",english94,english,female,159,usa,florida
28 | 163,163,52.0,0.0,"pittsburgh, pennsylvania, usa",english99,english,female,164,usa,pennsylvania
29 | 407,407,39.0,0.0,"los angeles, california, usa",english106,english,female,408,usa,california
30 | 419,419,26.0,0.0,"concord, new hampshire, usa",english109,english,female,420,usa,new hampshire
31 | 441,441,38.0,0.0,"birmingham 2, alabama, usa",english114,english,female,442,usa,alabama
32 | 444,444,60.0,0.0,"elmore, alabama, usa",english117,english,female,445,usa,alabama
33 | 468,468,28.0,0.0,"riverside, california, usa",english123,english,female,469,usa,california
34 | 486,486,34.0,0.0,"cincinnati, ohio, usa",english126,english,female,487,usa,ohio
35 | 489,489,20.0,0.0,"miami, florida, usa",english128,english,female,490,usa,florida
36 | 503,503,36.0,0.0,"norwich, new york, usa",english133,english,female,504,usa,new york
37 | 509,509,25.0,0.0,"palmer, alaska, usa",english138,english,female,510,usa,alaska
38 | 522,522,40.0,0.0,"merced, california, usa",english147,english,female,523,usa,california
39 | 539,539,18.0,0.0,"washington, district of columbia, usa",english158,english,female,540,usa,district of columbia
40 | 541,541,21.0,0.0,"redwood falls, minnesota, usa",english160,english,female,542,usa,minnesota
41 | 545,545,75.0,0.0,"wakefield, ohio, usa",english161,english,female,546,usa,ohio
42 | 547,547,41.0,0.0,"delaware, ohio, usa",english162,english,female,548,usa,ohio
43 | 549,549,43.0,0.0,"detroit, michigan, usa",english165,english,female,550,usa,michigan
44 | 555,555,50.0,0.0,"belmont, mississippi, usa",english170,english,female,556,usa,mississippi
45 | 572,572,18.0,0.0,"elizabeth city, north carolina, usa",english176,english,female,573,usa,north carolina
46 | 573,573,23.0,0.0,"new orleans, louisiana, usa",english177,english,female,574,usa,louisiana
47 | 596,596,18.0,0.0,"hillsboro, oregon, usa",english184,english,female,597,usa,oregon
48 | 604,604,39.0,0.0,"gadsden, alabama, usa",english186,english,female,605,usa,alabama
49 | 605,605,42.0,0.0,"algona, iowa, usa",english187,english,female,606,usa,iowa
50 | 635,635,27.0,0.0,"east lansing, michigan, usa",english192,english,female,636,usa,michigan
51 | 638,638,21.0,0.0,"delavan, wisconsin, usa",english195,english,female,639,usa,wisconsin
52 | 666,666,22.0,0.0,"berkeley, california, usa",english200,english,female,667,usa,california
53 | 667,667,26.0,0.0,"warren, michigan, usa",english201,english,female,668,usa,michigan
54 | 673,673,63.0,0.0,"boston, massachusetts, usa",english203,english,female,674,usa,massachusetts
55 | 678,678,45.0,0.0,"blue earth, minnesota, usa",english205,english,female,679,usa,minnesota
56 | 683,683,23.0,0.0,"wilkes-barre, pennsylvania, usa",english207,english,female,684,usa,pennsylvania
57 | 738,738,48.0,0.0,"chicago, illinois, usa",english216,english,female,739,usa,illinois
58 | 746,746,27.0,0.0,"abingdon, virginia, usa",english218,english,female,747,usa,virginia
59 | 747,747,38.0,0.0,"oakland, california, usa",english219,english,female,748,usa,california
60 | 748,748,18.0,0.0,"anaheim, california, usa",english220,english,female,749,usa,california
61 | 763,763,18.0,0.0,"auburn, indiana, usa",english222,english,female,764,usa,indiana
62 | 794,794,52.0,0.0,"richmond, virginia, usa",english235,english,female,795,usa,virginia
63 | 797,797,32.0,0.0,"metairie, louisiana, usa",english232,english,female,798,usa,louisiana
64 | 800,800,22.0,0.0,"jeffersonville, ohio, usa",english237,english,female,801,usa,ohio
65 | 817,817,77.0,0.0,"laurel, mississippi, usa",english242,english,female,818,usa,mississippi
66 | 839,839,18.0,0.0,"philadelphia, pennsylvania, usa",english248,english,female,840,usa,pennsylvania
67 | 849,849,29.0,0.0,"boston, massachusetts, usa",english249,english,female,850,usa,massachusetts
68 | 855,855,25.0,0.0,"san jose ca, california, usa",english252,english,female,856,usa,california
69 | 870,870,18.0,0.0,"phoenix, arizona, usa",english260,english,female,871,usa,arizona
70 | 874,874,59.0,0.0,"forestville, maryland, usa",english261,english,female,875,usa,maryland
71 | 911,911,20.0,0.0,"clifton, new jersey, usa",english275,english,female,912,usa,new jersey
72 | 917,917,50.0,0.0,"chattanooga, tennessee, usa",english276,english,female,918,usa,tennessee
73 | 918,918,48.0,0.0,"hudson, new york, usa",english277,english,female,919,usa,new york
74 | 920,920,18.0,0.0,"augusta, georgia, usa",english278,english,female,921,usa,georgia
75 | 976,976,21.0,0.0,"beaumont, texas, usa",english286,english,female,977,usa,texas
76 | 981,981,26.0,0.0,"st. louis, missouri, usa",english288,english,female,982,usa,missouri
77 | 982,982,19.0,0.0,"fairfax, virginia, usa",english289,english,female,983,usa,virginia
78 | 1051,1051,22.0,0.0,"billings, montana, usa",english293,english,female,1052,usa,montana
79 | 1085,1085,45.0,0.0,"anaheim, california, usa",english303,english,female,1086,usa,california
80 | 1109,1109,38.0,0.0,"detroit, michigan, usa",english311,english,female,1110,usa,michigan
81 | 1119,1119,26.0,0.0,"memphis, tennessee, usa",english314,english,female,1120,usa,tennessee
82 | 1205,1205,23.0,0.0,"hollywood, florida, usa",english328,english,female,1206,usa,florida
83 | 1206,1206,21.0,0.0,"boise, idaho, usa",english329,english,female,1207,usa,idaho
84 | 1207,1207,18.0,0.0,"augusta, georgia, usa",english330,english,female,1208,usa,georgia
85 | 1211,1211,20.0,0.0,"baltimore, maryland, usa",english332,english,female,1212,usa,maryland
86 | 1213,1213,32.0,0.0,"spokane, washington, usa",english333,english,female,1214,usa,washington
87 | 1216,1216,46.0,0.0,"baltimore, maryland, usa",english336,english,female,1217,usa,maryland
88 | 1217,1217,84.0,0.0,"jersey city, new jersey, usa",english337,english,female,1218,usa,new jersey
89 | 1223,1223,23.0,0.0,"new york, new york, usa",english341,english,female,1224,usa,new york
90 | 1231,1231,19.0,0.0,"lumberton, north carolina, usa",english344,english,female,1232,usa,north carolina
91 | 1235,1235,22.0,0.0,"burnsville, minnesota, usa",english347,english,female,1236,usa,minnesota
92 | 1238,1238,24.0,0.0,"warrenton, virginia, usa",english349,english,female,1239,usa,virginia
93 | 1276,1276,19.0,0.0,"erie, pennsylvania, usa",english360,english,female,1277,usa,pennsylvania
94 | 1279,1279,20.0,0.0,"burnsville, minnesota, usa",english361,english,female,1280,usa,minnesota
95 | 1302,1302,27.0,0.0,"colorado springs, colorado, usa",english371,english,female,1303,usa,colorado
96 | 1318,1318,32.0,0.0,"trenton, michigan, usa",english377,english,female,1319,usa,michigan
97 | 1319,1319,38.0,0.0,"silver spring, maryland, usa",english378,english,female,1320,usa,maryland
98 | 1322,1322,34.0,0.0,"roanoke, virginia, usa",english380,english,female,1323,usa,virginia
99 | 1325,1325,32.0,0.0,"washington, district of columbia, usa",english383,english,female,1326,usa,district of columbia
100 | 1327,1327,77.0,0.0,"mcveigh, kentucky, usa",english385,english,female,1328,usa,kentucky
101 | 1329,1329,56.0,0.0,"los angeles, california, usa",english386,english,female,1330,usa,california
102 | 1330,1330,21.0,0.0,"lancaster, california, usa",english387,english,female,1331,usa,california
103 | 1331,1331,70.0,0.0,"bluefield, west virginia, usa",english388,english,female,1332,usa,west virginia
104 | 1332,1332,71.0,0.0,"aldie, virginia, usa",english389,english,female,1333,usa,virginia
105 | 1334,1334,84.0,0.0,"milton, florida, usa",english391,english,female,1335,usa,florida
106 | 1336,1336,58.0,0.0,"washington, district of columbia, usa",english393,english,female,1337,usa,district of columbia
107 | 1338,1338,82.0,0.0,"aiken, south carolina, usa",english394,english,female,1339,usa,south carolina
108 | 1340,1340,48.0,0.0,"orange, virginia, usa",english396,english,female,1341,usa,virginia
109 | 1341,1341,76.0,0.0,"wadesboro, north carolina, usa",english397,english,female,1342,usa,north carolina
110 | 1342,1342,83.0,0.0,"salisbury, north carolina, usa",english398,english,female,1343,usa,north carolina
111 | 1344,1344,80.0,0.0,"la grange, georgia, usa",english399,english,female,1345,usa,georgia
112 | 1357,1357,24.0,0.0,"pensacola, florida, usa",english406,english,female,1358,usa,florida
113 | 1370,1370,26.0,0.0,"washington, district of columbia, usa",english411,english,female,1371,usa,district of columbia
114 | 1371,1371,31.0,0.0,"washington, district of columbia, usa",english412,english,female,1372,usa,district of columbia
115 | 1373,1373,30.0,0.0,"woonsocket, rhode island, usa",english414,english,female,1374,usa,rhode island
116 | 1394,1394,24.0,0.0,"baltimore, maryland, usa",english423,english,female,1395,usa,maryland
117 | 1410,1410,30.0,0.0,"florence, south carolina, usa",english426,english,female,1411,usa,south carolina
118 | 1411,1411,68.0,0.0,"hanover, pennsylvania, usa",english427,english,female,1412,usa,pennsylvania
119 | 1412,1412,55.0,0.0,"kingstree, south carolina, usa",english428,english,female,1413,usa,south carolina
120 | 1415,1415,19.0,0.0,"philadelphia, pennsylvania, usa",english429,english,female,1416,usa,pennsylvania
121 | 1422,1422,23.0,0.0,"green bay, wisconsin, usa",english431,english,female,1423,usa,wisconsin
122 | 1429,1429,32.0,0.0,"gladwin, michigan, usa",english433,english,female,1430,usa,michigan
123 | 1470,1470,20.0,0.0,"palm springs, california, usa",english435,english,female,1471,usa,california
124 | 1491,1491,20.0,0.0,"houston, texas, usa",english438,english,female,1492,usa,texas
125 | 1523,1523,19.0,0.0,"oquawka, illinois, usa",english441,english,female,1524,usa,illinois
126 | 1532,1532,23.0,0.0,"moorhead, minnesota, usa",english446,english,female,1533,usa,minnesota
127 | 1547,1547,21.0,0.0,"chicago, illinois, usa",english453,english,female,1548,usa,illinois
128 | 1549,1549,19.0,0.0,"raleigh, north carolina, usa",english454,english,female,1550,usa,north carolina
129 | 1551,1551,29.0,0.0,"washington, dc, usa",english455,english,female,1552,usa,dc
130 | 1558,1558,22.0,0.0,"reno, nevada, usa",english458,english,female,1559,usa,nevada
131 | 1562,1562,44.0,0.0,"poughkeepsie, new york, usa",english461,english,female,1563,usa,new york
132 | 1570,1570,53.0,0.0,"brooklyn, new york, usa",english463,english,female,1571,usa,new york
133 | 1655,1655,33.0,0.0,"rochester, minnesota, usa",english474,english,female,1656,usa,minnesota
134 | 1656,1656,29.0,0.0,"ogden, utah, usa",english475,english,female,1657,usa,utah
135 | 1657,1657,64.0,0.0,"manchester, new hampshire, usa",english476,english,female,1658,usa,new hampshire
136 | 1658,1658,31.0,0.0,"san diego, california, usa",english477,english,female,1659,usa,california
137 | 1663,1663,64.0,0.0,"new york, new york, usa",english479,english,female,1664,usa,new york
138 | 1665,1665,88.0,0.0,"stringtown, oklahoma, usa",english481,english,female,1666,usa,oklahoma
139 | 1666,1666,74.0,0.0,"yeadon, pennsylvania, usa",english482,english,female,1667,usa,pennsylvania
140 | 1715,1715,22.0,0.0,"omaha, nebraska, usa",english492,english,female,1716,usa,nebraska
141 | 1727,1727,21.0,0.0,"st. paul, minnesota, usa",english499,english,female,1728,usa,minnesota
142 | 1733,1733,64.0,0.0,"pittsburgh, pennsylvania, usa",english501,english,female,1734,usa,pennsylvania
143 | 1734,1734,30.0,0.0,"farmington hills, michigan, usa",english502,english,female,1735,usa,michigan
144 | 1798,1798,30.0,0.0,"new orleans, louisiana, usa",english506,english,female,1799,usa,louisiana
145 | 1800,1800,20.0,0.0,"winfield, illinois, usa",english508,english,female,1801,usa,illinois
146 | 1801,1801,44.0,0.0,"coudersport, pennsylvania, usa",english509,english,female,1802,usa,pennsylvania
147 | 1802,1802,66.0,0.0,"philadelphia, pennsylvania, usa",english510,english,female,1803,usa,pennsylvania
148 | 1817,1817,31.0,0.0,"montgomery, alabama, usa",english511,english,female,1818,usa,alabama
149 | 1818,1818,54.0,0.0,"salina, kansas, usa",english512,english,female,1819,usa,kansas
150 | 1872,1872,55.0,0.0,"youngstown, ohio, usa",english515,english,female,1873,usa,ohio
151 | 1874,1874,32.0,0.0,"dripping springs, texas, usa",english516,english,female,1875,usa,texas
152 | 1883,1883,47.0,0.0,"st. paul, minnesota, usa",english520,english,female,1884,usa,minnesota
153 | 1885,1885,25.0,0.0,"bethesda, maryland, usa",english521,english,female,1886,usa,maryland
154 | 1886,1886,22.0,0.0,"bethpage, new york, usa",english522,english,female,1887,usa,new york
155 | 1887,1887,18.0,0.0,"madison, wisconsin, usa",english523,english,female,1888,usa,wisconsin
156 | 1892,1892,18.0,0.0,"albion, new york, usa",english525,english,female,1893,usa,new york
157 | 1906,1906,25.0,0.0,"wynnewood, pennsylvania, usa",english527,english,female,1907,usa,pennsylvania
158 | 1907,1907,52.0,0.0,"green bay, wisconsin, usa",english528,english,female,1908,usa,wisconsin
159 | 1924,1924,29.0,0.0,"burlington, vermont, usa",english529,english,female,1925,usa,vermont
160 | 1925,1925,50.0,0.0,"detroit, michigan, usa",english530,english,female,1926,usa,michigan
161 | 1927,1927,29.0,0.0,"nokesville, virginia, usa",english531,english,female,1928,usa,virginia
162 | 1932,1932,23.0,0.0,"bristol, connecticut, usa",english532,english,female,1933,usa,connecticut
163 | 1933,1933,38.0,0.0,"fairfax, virginia, usa",english533,english,female,1934,usa,virginia
164 | 1950,1950,29.0,0.0,"mineola, new york, usa",english534,english,female,1951,usa,new york
165 | 1954,1954,22.0,0.0,"plymouth, massachusetts, usa",english535,english,female,1955,usa,massachusetts
166 | 1955,1955,21.0,0.0,"richmond, virginia, usa",english536,english,female,1956,usa,virginia
167 | 1956,1956,21.0,0.0,"woodbridge, virginia, usa",english537,english,female,1957,usa,virginia
168 | 1957,1957,20.0,0.0,"lyndhurst, virginia, usa",english538,english,female,1958,usa,virginia
169 | 1958,1958,19.0,0.0,"orange county, california, usa",english539,english,female,1959,usa,california
170 | 1959,1959,18.0,0.0,"arlington, virginia, usa",english540,english,female,1960,usa,virginia
171 | 1960,1960,27.0,0.0,"fort lauderdale, florida, usa",english541,english,female,1961,usa,florida
172 | 1995,1995,43.0,0.0,"tangier island, virginia, usa",english546,english,female,1996,usa,virginia
173 | 2028,2028,81.0,0.0,"king george, virginia, usa",english551,english,female,2029,usa,virginia
174 | 2042,2042,23.0,0.0,"bethesda, maryland, usa",english552,english,female,2043,usa,maryland
175 | 2046,2046,22.0,0.0,"hoffman estates, illinois, usa",english554,english,female,2047,usa,illinois
176 | 2049,2049,84.0,0.0,"brooklyn, new york, usa",english556,english,female,2050,usa,new york
177 | 2055,2055,19.0,0.0,"reston, virginia, usa",english559,english,female,2056,usa,virginia
178 | 2058,2058,22.0,0.0,"hartford, connecticut, usa",english560,english,female,2059,usa,connecticut
179 | 2059,2059,20.0,0.0,"yorktown, virginia, usa",english561,english,female,2060,usa,virginia
180 | 2060,2060,30.0,0.0,"florence, south carolina, usa",english562,english,female,2061,usa,south carolina
181 | 2079,2079,27.0,0.0,"portland, oregon, usa",english565,english,female,2080,usa,oregon
182 | 2102,2102,86.0,0.0,"quincy, florida, usa",english568,english,female,2103,usa,florida
183 | 2105,2105,42.0,0.0,"parma, ohio, usa",english570,english,female,2106,usa,ohio
184 | 2166,2166,63.0,0.0,"washington, dc, usa",english576,english,female,2167,usa,dc
185 | 2169,2169,38.0,0.0,"san leandro, california, usa",english577,english,female,2170,usa,california
186 |
--------------------------------------------------------------------------------
/dataframes/df_usa_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,state
2 | 60,60,42.0,0.0,"pittsburgh, pennsylvania, usa",english1,english,male,61,usa,pennsylvania
3 | 64,64,22.0,0.0,"torrington, connecticut, usa",english102,english,male,65,usa,connecticut
4 | 65,65,21.0,0.0,"staten island, new york, usa",english103,english,male,66,usa,new york
5 | 72,72,21.0,0.0,"wise, virginia, usa",english16,english,male,73,usa,virginia
6 | 73,73,79.0,0.0,"darwin va, virginia, usa",english17,english,male,74,usa,virginia
7 | 75,75,53.0,0.0,"louisville, kentucky, usa",english19,english,male,76,usa,kentucky
8 | 80,80,43.0,0.0,"englewood, tennessee, usa",english23,english,male,81,usa,tennessee
9 | 82,82,57.0,0.0,"atlanta, georgia, usa",english25,english,male,83,usa,georgia
10 | 83,83,71.0,0.0,"smith island, maryland, usa",english26,english,male,84,usa,maryland
11 | 91,91,21.0,0.0,"san diego, california, usa",english33,english,male,92,usa,california
12 | 94,94,60.0,0.0,"arcadia, wisconsin, usa",english36,english,male,95,usa,wisconsin
13 | 104,104,52.0,0.0,"pine bluff, arkansas, usa",english45,english,male,105,usa,arkansas
14 | 105,105,73.0,0.0,"creswell, north carolina, usa",english46,english,male,106,usa,north carolina
15 | 108,108,43.0,0.0,"castro valley, california, usa",english49,english,male,109,usa,california
16 | 109,109,62.0,0.0,"fairfax, virginia, usa",english5,english,male,110,usa,virginia
17 | 111,111,67.0,0.0,"detroit, michigan, usa",english51,english,male,112,usa,michigan
18 | 113,113,63.0,0.0,"syracuse, new york, usa",english53,english,male,114,usa,new york
19 | 119,119,30.0,0.0,"columbus, ohio, usa",english59,english,male,120,usa,ohio
20 | 121,121,18.0,0.0,"chesapeake, virginia, usa",english60,english,male,122,usa,virginia
21 | 123,123,30.0,0.0,"west jordan, utah, usa",english62,english,male,124,usa,utah
22 | 124,124,21.0,0.0,"lewisville, texas, usa",english63,english,male,125,usa,texas
23 | 126,126,47.0,0.0,"lewistown, pennsylvania, usa",english65,english,male,127,usa,pennsylvania
24 | 127,127,19.0,0.0,"baltimore, maryland, usa",english66,english,male,128,usa,maryland
25 | 128,128,37.0,0.0,"princeton, indiana, usa",english67,english,male,129,usa,indiana
26 | 129,129,52.0,0.0,"new york, new york, usa",english68,english,male,130,usa,new york
27 | 131,131,52.0,0.0,"macon, mississippi, usa",english7,english,male,132,usa,mississippi
28 | 132,132,21.0,0.0,"beaumont, texas, usa",english70,english,male,133,usa,texas
29 | 133,133,20.0,0.0,"west covina, california, usa",english71,english,male,134,usa,california
30 | 136,136,21.0,0.0,"wichita, kansas, usa",english74,english,male,137,usa,kansas
31 | 137,137,32.0,0.0,"idaho falls, idaho, usa",english75,english,male,138,usa,idaho
32 | 141,141,73.0,0.0,"boston, massachusetts, usa",english79,english,male,142,usa,massachusetts
33 | 144,144,30.0,0.0,"st. paul, minnesota, usa",english81,english,male,145,usa,minnesota
34 | 149,149,50.0,0.0,"charleston, south carolina, usa",english86,english,male,150,usa,south carolina
35 | 151,151,18.0,0.0,"grand rapids, michigan, usa",english88,english,male,152,usa,michigan
36 | 152,152,18.0,0.0,"kingston ma, massachusetts, usa",english89,english,male,153,usa,massachusetts
37 | 154,154,53.0,0.0,"pittsburgh, pennsylvania, usa",english90,english,male,155,usa,pennsylvania
38 | 159,159,18.0,0.0,"mishawaka, indiana, usa",english95,english,male,160,usa,indiana
39 | 160,160,31.0,0.0,"point pleasant, new jersey, usa",english96,english,male,161,usa,new jersey
40 | 161,161,42.0,0.0,"wilmington nc, north carolina, usa",english97,english,male,162,usa,north carolina
41 | 162,162,22.0,0.0,"spokane, washington, usa",english98,english,male,163,usa,washington
42 | 261,261,22.0,5.0,"berkeley, california, usa",mandarin7,mandarin,male,262,usa,california
43 | 415,415,19.0,0.0,"chicago, illinois, usa",english107,english,male,416,usa,illinois
44 | 443,443,56.0,0.0,"atlanta, georgia, usa",english116,english,male,444,usa,georgia
45 | 445,445,19.0,0.0,"akron, ohio, usa",english118,english,male,446,usa,ohio
46 | 463,463,19.0,16.0,"new orleans, louisiana, usa",kikongo1,kikongo,male,464,usa,louisiana
47 | 464,464,21.0,0.0,"waconia, minnesota, usa",english121,english,male,465,usa,minnesota
48 | 479,479,38.0,0.0,"brooklyn, new york, usa",english124,english,male,480,usa,new york
49 | 488,488,39.0,0.0,"new britain, connecticut, usa",english127,english,male,489,usa,connecticut
50 | 496,496,23.0,0.0,"west palm beach, florida, usa",english131,english,male,497,usa,florida
51 | 506,506,22.0,0.0,"orange beach, alabama, usa",english135,english,male,507,usa,alabama
52 | 508,508,26.0,0.0,"charleston, west virginia, usa",english137,english,male,509,usa,west virginia
53 | 510,510,20.0,0.0,"providence, rhode island, usa",english139,english,male,511,usa,rhode island
54 | 516,516,19.0,0.0,"avon, new york, usa",english142,english,male,517,usa,new york
55 | 517,517,42.0,0.0,"minneapolis, minnesota, usa",english143,english,male,518,usa,minnesota
56 | 521,521,22.0,0.0,"blytheville, arkansas, usa",english146,english,male,522,usa,arkansas
57 | 525,525,18.0,0.0,"lawrenceville, georgia, usa",english149,english,male,526,usa,georgia
58 | 526,526,31.0,0.0,"brownsville, kentucky, usa",english150,english,male,527,usa,kentucky
59 | 527,527,18.0,0.0,"baltimore, maryland, usa",english151,english,male,528,usa,maryland
60 | 534,534,35.0,0.0,"oakland, california, usa",english155,english,male,535,usa,california
61 | 537,537,35.0,0.0,"fort worth, texas, usa",english157,english,male,538,usa,texas
62 | 546,546,41.0,0.0,"fairview park, ohio, usa",english163,english,male,547,usa,ohio
63 | 550,550,18.0,0.0,"glenside, pennsylvania, usa",english166,english,male,551,usa,pennsylvania
64 | 551,551,43.0,0.0,"lakeview, michigan, usa",english167,english,male,552,usa,michigan
65 | 553,553,40.0,0.0,"oceanside, california, usa",english168,english,male,554,usa,california
66 | 554,554,18.0,0.0,"cleveland, mississippi, usa",english169,english,male,555,usa,mississippi
67 | 562,562,21.0,0.0,"st. louis, missouri, usa",english171,english,male,563,usa,missouri
68 | 566,566,19.0,0.0,"libertyville, illinois, usa",english173,english,male,567,usa,illinois
69 | 571,571,24.0,0.0,"seattle, washington, usa",english175,english,male,572,usa,washington
70 | 582,582,30.0,0.0,"wilkes-barre, pennsylvania, usa",english178,english,male,583,usa,pennsylvania
71 | 583,583,57.0,0.0,"huron, south dakota, usa",english179,english,male,584,usa,south dakota
72 | 584,584,20.0,0.0,"newport, rhode island, usa",english180,english,male,585,usa,rhode island
73 | 585,585,68.0,0.0,"new eagle, pennsylvania, usa",english181,english,male,586,usa,pennsylvania
74 | 589,589,50.0,0.0,"troy, new york, usa",english182,english,male,590,usa,new york
75 | 616,616,19.0,0.0,"grand forks, north dakota, usa",english189,english,male,617,usa,north dakota
76 | 617,617,27.0,0.0,"oak park, illinois, usa",english190,english,male,618,usa,illinois
77 | 630,630,19.0,0.0,"las cruces, new mexico, usa",english191,english,male,631,usa,new mexico
78 | 636,636,32.0,0.0,"honolulu, hawaii, usa",english193,english,male,637,usa,hawaii
79 | 661,661,22.0,0.0,"iowa city, iowa, usa",english197,english,male,662,usa,iowa
80 | 662,662,56.0,0.0,"washington, district of columbia, usa",english198,english,male,663,usa,district of columbia
81 | 663,663,25.0,0.0,"bay shore, new york, usa",english199,english,male,664,usa,new york
82 | 672,672,19.0,0.0,"crisfield, maryland, usa",english202,english,male,673,usa,maryland
83 | 677,677,53.0,0.0,"chicago, illinois, usa",english204,english,male,678,usa,illinois
84 | 694,694,46.0,0.0,"washington, dc, usa",english208,english,male,695,usa,dc
85 | 719,719,30.0,0.0,"worcester, massachusetts, usa",english211,english,male,720,usa,massachusetts
86 | 728,728,24.0,0.0,"spartanburg, south carolina, usa",english212,english,male,729,usa,south carolina
87 | 733,733,38.0,0.0,"dodge city, kansas, usa",english213,english,male,734,usa,kansas
88 | 736,736,26.0,0.0,"myrtle beach, south carolina, usa",english214,english,male,737,usa,south carolina
89 | 740,740,22.0,6.0,"washington, dc, usa",french18,french,male,741,usa,dc
90 | 766,766,18.0,0.0,"erie, pennsylvania, usa",english223,english,male,767,usa,pennsylvania
91 | 774,774,25.0,0.0,"burlington, vermont, usa",english229,english,male,775,usa,vermont
92 | 790,790,22.0,0.0,"tampa, florida, usa",english231,english,male,791,usa,florida
93 | 793,793,49.0,0.0,"bloomington, indiana, usa",english234,english,male,794,usa,indiana
94 | 798,798,44.0,0.0,"st. charles, illinois, usa",english236,english,male,799,usa,illinois
95 | 820,820,23.0,0.0,"san francisco, california, usa",english244,english,male,821,usa,california
96 | 823,823,25.0,0.0,"pittsburgh, pennsylvania, usa",english245,english,male,824,usa,pennsylvania
97 | 824,824,32.0,0.0,"los angeles, california, usa",english246,english,male,825,usa,california
98 | 854,854,21.0,0.0,"mt. kisco, new york, usa",english251,english,male,855,usa,new york
99 | 857,857,39.0,0.0,"eugene, oregon, usa",english254,english,male,858,usa,oregon
100 | 860,860,23.0,0.0,"laurinburg, north carolina, usa",english256,english,male,861,usa,north carolina
101 | 862,862,24.0,0.0,"san diego, california, usa",english257,english,male,863,usa,california
102 | 875,875,60.0,0.0,"naylor, maryland, usa",english262,english,male,876,usa,maryland
103 | 883,883,21.0,0.0,"oak forest, illinois, usa",english263,english,male,884,usa,illinois
104 | 888,888,21.0,0.0,"freemont, california, usa",english264,english,male,889,usa,california
105 | 889,889,31.0,0.0,"vancouver wa, washington, usa",english265,english,male,890,usa,washington
106 | 898,898,18.0,0.0,"san diego, california, usa",english266,english,male,899,usa,california
107 | 904,904,20.0,0.0,"dallas, texas, usa",english272,english,male,905,usa,texas
108 | 906,906,46.0,0.0,"blue bell, pennsylvania, usa",english273,english,male,907,usa,pennsylvania
109 | 936,936,23.0,0.0,"ramsey mn, minnesota, usa",english281,english,male,937,usa,minnesota
110 | 938,938,20.0,0.0,"tampa, florida, usa",english282,english,male,939,usa,florida
111 | 950,950,18.0,0.0,"pelham, new york, usa",english283,english,male,951,usa,new york
112 | 951,951,33.0,0.0,"gainesville, florida, usa",english284,english,male,952,usa,florida
113 | 985,985,20.0,0.0,"duluth, georgia, usa",english290,english,male,986,usa,georgia
114 | 1050,1050,38.0,0.0,"kansas city, missouri, usa",english292,english,male,1051,usa,missouri
115 | 1070,1070,19.0,0.0,"lindenhurst, new york, usa",english297,english,male,1071,usa,new york
116 | 1099,1099,48.0,0.0,"alexandria, va, usa",english307,english,male,1100,usa,va
117 | 1118,1118,28.0,0.0,"silver spring, maryland, usa",english313,english,male,1119,usa,maryland
118 | 1120,1120,25.0,0.0,"los angeles, california, usa",english315,english,male,1121,usa,california
119 | 1121,1121,33.0,0.0,"sacramento, california, usa",english316,english,male,1122,usa,california
120 | 1131,1131,26.0,0.0,"burlington, vermont, usa",english317,english,male,1132,usa,vermont
121 | 1160,1160,52.0,5.0,"brooklyn, new york, usa",yiddish3,yiddish,male,1161,usa,new york
122 | 1162,1162,52.0,0.0,"brooklyn, new york, usa",english321,english,male,1163,usa,new york
123 | 1175,1175,32.0,0.0,"reading, pennsylvania, usa",english325,english,male,1176,usa,pennsylvania
124 | 1187,1187,53.0,0.0,"fairborn, ohio, usa",english326,english,male,1188,usa,ohio
125 | 1204,1204,27.0,0.0,"dunedin, florida, usa",english327,english,male,1205,usa,florida
126 | 1215,1215,6.0,0.0,"washington, district of columbia, usa",english335,english,male,1216,usa,district of columbia
127 | 1219,1219,54.0,0.0,"chicago, illinois, usa",english339,english,male,1220,usa,illinois
128 | 1221,1221,19.0,0.0,"charleston, west virginia, usa",english340,english,male,1222,usa,west virginia
129 | 1224,1224,33.0,0.0,"kansas city, missouri, usa",english342,english,male,1225,usa,missouri
130 | 1225,1225,23.0,0.0,"portland, maine, usa",english343,english,male,1226,usa,maine
131 | 1232,1232,20.0,0.0,"kansas city, missouri, usa",english345,english,male,1233,usa,missouri
132 | 1234,1234,60.0,0.0,"east hartford, connecticut, usa",english346,english,male,1235,usa,connecticut
133 | 1241,1241,37.0,0.0,"carthage, missouri, usa",english351,english,male,1242,usa,missouri
134 | 1255,1255,19.0,0.0,"los angeles, california, usa",english355,english,male,1256,usa,california
135 | 1257,1257,24.0,0.0,"alto, georgia, usa",english357,english,male,1258,usa,georgia
136 | 1296,1296,27.0,0.0,"mcminnville, oregon, usa",english369,english,male,1297,usa,oregon
137 | 1306,1306,80.0,0.0,"christiansburg, virginia, usa",english372,english,male,1307,usa,virginia
138 | 1307,1307,22.0,0.0,"russellville, kentucky, usa",english373,english,male,1308,usa,kentucky
139 | 1311,1311,28.0,0.0,"manchester, connecticut, usa",english375,english,male,1312,usa,connecticut
140 | 1314,1314,46.0,0.0,"pasadena, california, usa",english376,english,male,1315,usa,california
141 | 1320,1320,43.0,0.0,"summit, new jersey, usa",english379,english,male,1321,usa,new jersey
142 | 1323,1323,85.0,0.0,"pike county, kentucky, usa",english381,english,male,1324,usa,kentucky
143 | 1324,1324,34.0,0.0,"arlington, virginia, usa",english382,english,male,1325,usa,virginia
144 | 1326,1326,57.0,0.0,"williamson, west virginia, usa",english384,english,male,1327,usa,west virginia
145 | 1333,1333,74.0,0.0,"milwaukee, wisconsin, usa",english390,english,male,1334,usa,wisconsin
146 | 1335,1335,62.0,0.0,"warrenton, virginia, usa",english392,english,male,1336,usa,virginia
147 | 1339,1339,39.0,0.0,"alexandria, virginia, usa",english395,english,male,1340,usa,virginia
148 | 1360,1360,64.0,0.0,"lynwood, california, usa",english408,english,male,1361,usa,california
149 | 1374,1374,32.0,0.0,"woonsocket, rhode island, usa",english415,english,male,1375,usa,rhode island
150 | 1384,1384,27.0,0.0,"youngstown, ohio, usa",english419,english,male,1385,usa,ohio
151 | 1389,1389,31.0,0.0,"rochester, new york, usa",english422,english,male,1390,usa,new york
152 | 1408,1408,75.0,0.0,"danville, virginia, usa",english424,english,male,1409,usa,virginia
153 | 1409,1409,31.0,0.0,"danville, virginia, usa",english425,english,male,1410,usa,virginia
154 | 1468,1468,24.0,0.0,"kansas city, kansas, usa",english434,english,male,1469,usa,kansas
155 | 1476,1476,21.0,0.0,"cleveland, ohio, usa",english437,english,male,1477,usa,ohio
156 | 1527,1527,23.0,0.0,"salisbury mills, new york, usa",english442,english,male,1528,usa,new york
157 | 1529,1529,31.0,0.0,"fort collins, colorado, usa",english443,english,male,1530,usa,colorado
158 | 1530,1530,24.0,0.0,"anniston, alabama, usa",english444,english,male,1531,usa,alabama
159 | 1531,1531,60.0,0.0,"san francisco, california, usa",english445,english,male,1532,usa,california
160 | 1533,1533,21.0,0.0,"hartford, connecticut, usa",english447,english,male,1534,usa,connecticut
161 | 1537,1537,27.0,0.0,"barton, vermont, usa",english449,english,male,1538,usa,vermont
162 | 1545,1545,44.0,0.0,"hazlehurst, georgia, usa",english451,english,male,1546,usa,georgia
163 | 1554,1554,43.0,0.0,"syracuse, new york, usa",english457,english,male,1555,usa,new york
164 | 1559,1559,19.0,0.0,"charlotte, north carolina, usa",english459,english,male,1560,usa,north carolina
165 | 1563,1563,20.0,0.0,"paducah, kentucky, usa",english462,english,male,1564,usa,kentucky
166 | 1636,1636,18.0,0.0,"portland, maine, usa",english466,english,male,1637,usa,maine
167 | 1639,1639,30.0,0.0,"myrtle beach, south carolina, usa",english468,english,male,1640,usa,south carolina
168 | 1640,1640,22.0,0.0,"knoxville, tennessee, usa",english469,english,male,1641,usa,tennessee
169 | 1664,1664,58.0,0.0,"winston-salem, north carolina, usa",english480,english,male,1665,usa,north carolina
170 | 1667,1667,19.0,0.0,"boston, massachusetts, usa",english483,english,male,1668,usa,massachusetts
171 | 1673,1673,20.0,0.0,"cromwell, connecticut, usa",english485,english,male,1674,usa,connecticut
172 | 1674,1674,73.0,5.0,"pepeekeo, hawai'i, usa",hawai'i,hawai'i,male,1675,usa,hawai'i
173 | 1711,1711,24.0,4.0,"bethlehem, pennsylvania, usa",greek12,greek,male,1712,usa,pennsylvania
174 | 1717,1717,20.0,0.0,"miami, florida, usa",english493,english,male,1718,usa,florida
175 | 1718,1718,27.5,0.0,"findlay, ohio, usa",english494,english,male,1719,usa,ohio
176 | 1719,1719,21.0,0.0,"kirkland, washington, usa",english495,english,male,1720,usa,washington
177 | 1725,1725,22.0,0.0,"cleveland, ohio, usa",english497,english,male,1726,usa,ohio
178 | 1785,1785,25.0,0.0,"west palm beach, florida, usa",english504,english,male,1786,usa,florida
179 | 1871,1871,57.0,0.0,"youngstown, ohio, usa",english514,english,male,1872,usa,ohio
180 | 1875,1875,33.0,0.0,"colorado springs, colorado, usa",english517,english,male,1876,usa,colorado
181 | 1889,1889,19.0,0.0,"downers grove, illinois, usa",english524,english,male,1890,usa,illinois
182 | 1894,1894,20.0,0.0,"woodbridge, virginia, usa",english526,english,male,1895,usa,virginia
183 | 1949,1949,26.0,3.0,"bayside, new york, usa",korean42,korean,male,1950,usa,new york
184 | 1969,1969,31.0,0.0,"hanover, new hampshire, usa",english544,english,male,1970,usa,new hampshire
185 | 1994,1994,56.0,0.0,"tangier island, virginia, usa",english545,english,male,1995,usa,virginia
186 | 1996,1996,63.0,0.0,"tangier island, virginia, usa",english547,english,male,1997,usa,virginia
187 | 1999,1999,23.0,7.0,"superior, colorado, usa",mandarin58,mandarin,male,2000,usa,colorado
188 | 2000,2000,19.0,5.0,"woodbridge, virginia, usa",twi5,twi,male,2001,usa,virginia
189 | 2027,2027,82.0,0.0,"fredericksburg, virginia, usa",english550,english,male,2028,usa,virginia
190 | 2045,2045,21.0,0.0,"nashville, tennessee, usa",english553,english,male,2046,usa,tennessee
191 | 2048,2048,90.0,0.0,"brooklyn, new york, usa",english555,english,male,2049,usa,new york
192 | 2054,2054,23.0,0.0,"queens, new york, usa",english558,english,male,2055,usa,new york
193 | 2073,2073,32.0,0.0,"manassas, virginia, usa",english563,english,male,2074,usa,virginia
194 | 2076,2076,52.0,0.0,"casper, wyoming, usa",english564,english,male,2077,usa,wyoming
195 | 2100,2100,24.0,0.0,"new york, new york, usa",english567,english,male,2101,usa,new york
196 | 2112,2112,39.0,0.0,"washington, dc, usa",english571,english,male,2113,usa,dc
197 | 2120,2120,51.0,0.0,"fort worth, texas, usa",english572,english,male,2121,usa,texas
198 | 2123,2123,46.0,0.0,"painesville, ohio, usa",english573,english,male,2124,usa,ohio
199 | 2145,2145,21.0,6.0,"alexandria, virginia, usa",tagalog17,tagalog,male,2146,usa,virginia
200 | 2164,2164,24.0,0.0,"great falls, virginia, usa",english575,english,male,2165,usa,virginia
201 |
--------------------------------------------------------------------------------
/dataframes/df_usa_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,state
2 | 61,61,35.0,0.0,"davenport, iowa, usa",english10,english,female,62,usa,iowa
3 | 62,62,23.0,0.0,"miami, florida, usa",english100,english,female,63,usa,florida
4 | 66,66,18.0,0.0,"youngstown, ohio, usa",english104,english,female,67,usa,ohio
5 | 71,71,7.0,0.0,"norton, virginia, usa",english15,english,female,72,usa,virginia
6 | 74,74,39.0,0.0,"dickenson county, virginia, usa",english18,english,female,75,usa,virginia
7 | 77,77,27.0,0.0,"mt. holly, north carolina, usa",english20,english,female,78,usa,north carolina
8 | 78,78,37.0,0.0,"boston, massachusetts, usa",english21,english,female,79,usa,massachusetts
9 | 84,84,37.0,0.0,"smith island, maryland, usa",english27,english,female,85,usa,maryland
10 | 90,90,50.0,0.0,"fresno, california, usa",english32,english,female,91,usa,california
11 | 92,92,18.0,0.0,"ronkonkoma, new york, usa",english34,english,female,93,usa,new york
12 | 93,93,60.0,0.0,"milwaukee, wisconsin, usa",english35,english,female,94,usa,wisconsin
13 | 97,97,59.0,0.0,"new orleans, louisiana, usa",english39,english,female,98,usa,louisiana
14 | 103,103,63.0,0.0,"winston salem, north carolina, usa",english44,english,female,104,usa,north carolina
15 | 106,106,76.0,0.0,"plantersville, arkansas, usa",english47,english,female,107,usa,arkansas
16 | 110,110,29.0,0.0,"baltic, south dakota, usa",english50,english,female,111,usa,south dakota
17 | 114,114,40.0,0.0,"new albany, indiana, usa",english54,english,female,115,usa,indiana
18 | 115,115,53.0,0.0,"st. louis, missouri, usa",english55,english,female,116,usa,missouri
19 | 120,120,45.0,0.0,"brooklyn, new york, usa",english6,english,female,121,usa,new york
20 | 138,138,18.0,0.0,"boston, massachusetts, usa",english76,english,female,139,usa,massachusetts
21 | 140,140,20.0,0.0,"caribou, maine, usa",english78,english,female,141,usa,maine
22 | 145,145,50.0,0.0,"chicago, illinois, usa",english82,english,female,146,usa,illinois
23 | 146,146,42.0,0.0,"winnfield, louisiana, usa",english83,english,female,147,usa,louisiana
24 | 153,153,48.0,0.0,"carthage, texas, usa",english9,english,female,154,usa,texas
25 | 156,156,22.0,0.0,"wisconsin rapids, wisconsin, usa",english92,english,female,157,usa,wisconsin
26 | 157,157,22.0,0.0,"mamou, louisiana, usa",english93,english,female,158,usa,louisiana
27 | 158,158,55.0,0.0,"pensacola, florida, usa",english94,english,female,159,usa,florida
28 | 163,163,52.0,0.0,"pittsburgh, pennsylvania, usa",english99,english,female,164,usa,pennsylvania
29 | 407,407,39.0,0.0,"los angeles, california, usa",english106,english,female,408,usa,california
30 | 419,419,26.0,0.0,"concord, new hampshire, usa",english109,english,female,420,usa,new hampshire
31 | 441,441,38.0,0.0,"birmingham 2, alabama, usa",english114,english,female,442,usa,alabama
32 | 444,444,60.0,0.0,"elmore, alabama, usa",english117,english,female,445,usa,alabama
33 | 468,468,28.0,0.0,"riverside, california, usa",english123,english,female,469,usa,california
34 | 486,486,34.0,0.0,"cincinnati, ohio, usa",english126,english,female,487,usa,ohio
35 | 489,489,20.0,0.0,"miami, florida, usa",english128,english,female,490,usa,florida
36 | 503,503,36.0,0.0,"norwich, new york, usa",english133,english,female,504,usa,new york
37 | 509,509,25.0,0.0,"palmer, alaska, usa",english138,english,female,510,usa,alaska
38 | 522,522,40.0,0.0,"merced, california, usa",english147,english,female,523,usa,california
39 | 539,539,18.0,0.0,"washington, district of columbia, usa",english158,english,female,540,usa,district of columbia
40 | 541,541,21.0,0.0,"redwood falls, minnesota, usa",english160,english,female,542,usa,minnesota
41 | 545,545,75.0,0.0,"wakefield, ohio, usa",english161,english,female,546,usa,ohio
42 | 547,547,41.0,0.0,"delaware, ohio, usa",english162,english,female,548,usa,ohio
43 | 549,549,43.0,0.0,"detroit, michigan, usa",english165,english,female,550,usa,michigan
44 | 555,555,50.0,0.0,"belmont, mississippi, usa",english170,english,female,556,usa,mississippi
45 | 570,570,31.0,1.0,"bethel, alaska, usa",yupik1,yupik,female,571,usa,alaska
46 | 572,572,18.0,0.0,"elizabeth city, north carolina, usa",english176,english,female,573,usa,north carolina
47 | 573,573,23.0,0.0,"new orleans, louisiana, usa",english177,english,female,574,usa,louisiana
48 | 596,596,18.0,0.0,"hillsboro, oregon, usa",english184,english,female,597,usa,oregon
49 | 604,604,39.0,0.0,"gadsden, alabama, usa",english186,english,female,605,usa,alabama
50 | 605,605,42.0,0.0,"algona, iowa, usa",english187,english,female,606,usa,iowa
51 | 635,635,27.0,0.0,"east lansing, michigan, usa",english192,english,female,636,usa,michigan
52 | 638,638,21.0,0.0,"delavan, wisconsin, usa",english195,english,female,639,usa,wisconsin
53 | 666,666,22.0,0.0,"berkeley, california, usa",english200,english,female,667,usa,california
54 | 667,667,26.0,0.0,"warren, michigan, usa",english201,english,female,668,usa,michigan
55 | 673,673,63.0,0.0,"boston, massachusetts, usa",english203,english,female,674,usa,massachusetts
56 | 678,678,45.0,0.0,"blue earth, minnesota, usa",english205,english,female,679,usa,minnesota
57 | 683,683,23.0,0.0,"wilkes-barre, pennsylvania, usa",english207,english,female,684,usa,pennsylvania
58 | 738,738,48.0,0.0,"chicago, illinois, usa",english216,english,female,739,usa,illinois
59 | 746,746,27.0,0.0,"abingdon, virginia, usa",english218,english,female,747,usa,virginia
60 | 747,747,38.0,0.0,"oakland, california, usa",english219,english,female,748,usa,california
61 | 748,748,18.0,0.0,"anaheim, california, usa",english220,english,female,749,usa,california
62 | 763,763,18.0,0.0,"auburn, indiana, usa",english222,english,female,764,usa,indiana
63 | 794,794,52.0,0.0,"richmond, virginia, usa",english235,english,female,795,usa,virginia
64 | 797,797,32.0,0.0,"metairie, louisiana, usa",english232,english,female,798,usa,louisiana
65 | 800,800,22.0,0.0,"jeffersonville, ohio, usa",english237,english,female,801,usa,ohio
66 | 817,817,77.0,0.0,"laurel, mississippi, usa",english242,english,female,818,usa,mississippi
67 | 839,839,18.0,0.0,"philadelphia, pennsylvania, usa",english248,english,female,840,usa,pennsylvania
68 | 849,849,29.0,0.0,"boston, massachusetts, usa",english249,english,female,850,usa,massachusetts
69 | 855,855,25.0,0.0,"san jose ca, california, usa",english252,english,female,856,usa,california
70 | 870,870,18.0,0.0,"phoenix, arizona, usa",english260,english,female,871,usa,arizona
71 | 874,874,59.0,0.0,"forestville, maryland, usa",english261,english,female,875,usa,maryland
72 | 911,911,20.0,0.0,"clifton, new jersey, usa",english275,english,female,912,usa,new jersey
73 | 917,917,50.0,0.0,"chattanooga, tennessee, usa",english276,english,female,918,usa,tennessee
74 | 918,918,48.0,0.0,"hudson, new york, usa",english277,english,female,919,usa,new york
75 | 920,920,18.0,0.0,"augusta, georgia, usa",english278,english,female,921,usa,georgia
76 | 976,976,21.0,0.0,"beaumont, texas, usa",english286,english,female,977,usa,texas
77 | 981,981,26.0,0.0,"st. louis, missouri, usa",english288,english,female,982,usa,missouri
78 | 982,982,19.0,0.0,"fairfax, virginia, usa",english289,english,female,983,usa,virginia
79 | 1051,1051,22.0,0.0,"billings, montana, usa",english293,english,female,1052,usa,montana
80 | 1085,1085,45.0,0.0,"anaheim, california, usa",english303,english,female,1086,usa,california
81 | 1109,1109,38.0,0.0,"detroit, michigan, usa",english311,english,female,1110,usa,michigan
82 | 1119,1119,26.0,0.0,"memphis, tennessee, usa",english314,english,female,1120,usa,tennessee
83 | 1205,1205,23.0,0.0,"hollywood, florida, usa",english328,english,female,1206,usa,florida
84 | 1206,1206,21.0,0.0,"boise, idaho, usa",english329,english,female,1207,usa,idaho
85 | 1207,1207,18.0,0.0,"augusta, georgia, usa",english330,english,female,1208,usa,georgia
86 | 1211,1211,20.0,0.0,"baltimore, maryland, usa",english332,english,female,1212,usa,maryland
87 | 1213,1213,32.0,0.0,"spokane, washington, usa",english333,english,female,1214,usa,washington
88 | 1216,1216,46.0,0.0,"baltimore, maryland, usa",english336,english,female,1217,usa,maryland
89 | 1217,1217,84.0,0.0,"jersey city, new jersey, usa",english337,english,female,1218,usa,new jersey
90 | 1223,1223,23.0,0.0,"new york, new york, usa",english341,english,female,1224,usa,new york
91 | 1231,1231,19.0,0.0,"lumberton, north carolina, usa",english344,english,female,1232,usa,north carolina
92 | 1235,1235,22.0,0.0,"burnsville, minnesota, usa",english347,english,female,1236,usa,minnesota
93 | 1237,1237,20.0,5.0,"miami, florida, usa",spanish72,spanish,female,1238,usa,florida
94 | 1238,1238,24.0,0.0,"warrenton, virginia, usa",english349,english,female,1239,usa,virginia
95 | 1276,1276,19.0,0.0,"erie, pennsylvania, usa",english360,english,female,1277,usa,pennsylvania
96 | 1279,1279,20.0,0.0,"burnsville, minnesota, usa",english361,english,female,1280,usa,minnesota
97 | 1302,1302,27.0,0.0,"colorado springs, colorado, usa",english371,english,female,1303,usa,colorado
98 | 1318,1318,32.0,0.0,"trenton, michigan, usa",english377,english,female,1319,usa,michigan
99 | 1319,1319,38.0,0.0,"silver spring, maryland, usa",english378,english,female,1320,usa,maryland
100 | 1322,1322,34.0,0.0,"roanoke, virginia, usa",english380,english,female,1323,usa,virginia
101 | 1325,1325,32.0,0.0,"washington, district of columbia, usa",english383,english,female,1326,usa,district of columbia
102 | 1327,1327,77.0,0.0,"mcveigh, kentucky, usa",english385,english,female,1328,usa,kentucky
103 | 1329,1329,56.0,0.0,"los angeles, california, usa",english386,english,female,1330,usa,california
104 | 1330,1330,21.0,0.0,"lancaster, california, usa",english387,english,female,1331,usa,california
105 | 1331,1331,70.0,0.0,"bluefield, west virginia, usa",english388,english,female,1332,usa,west virginia
106 | 1332,1332,71.0,0.0,"aldie, virginia, usa",english389,english,female,1333,usa,virginia
107 | 1334,1334,84.0,0.0,"milton, florida, usa",english391,english,female,1335,usa,florida
108 | 1336,1336,58.0,0.0,"washington, district of columbia, usa",english393,english,female,1337,usa,district of columbia
109 | 1338,1338,82.0,0.0,"aiken, south carolina, usa",english394,english,female,1339,usa,south carolina
110 | 1340,1340,48.0,0.0,"orange, virginia, usa",english396,english,female,1341,usa,virginia
111 | 1341,1341,76.0,0.0,"wadesboro, north carolina, usa",english397,english,female,1342,usa,north carolina
112 | 1342,1342,83.0,0.0,"salisbury, north carolina, usa",english398,english,female,1343,usa,north carolina
113 | 1344,1344,80.0,0.0,"la grange, georgia, usa",english399,english,female,1345,usa,georgia
114 | 1357,1357,24.0,0.0,"pensacola, florida, usa",english406,english,female,1358,usa,florida
115 | 1370,1370,26.0,0.0,"washington, district of columbia, usa",english411,english,female,1371,usa,district of columbia
116 | 1371,1371,31.0,0.0,"washington, district of columbia, usa",english412,english,female,1372,usa,district of columbia
117 | 1373,1373,30.0,0.0,"woonsocket, rhode island, usa",english414,english,female,1374,usa,rhode island
118 | 1393,1393,19.0,9.0,"shady grove, maryland, usa",arabic38,arabic,female,1394,usa,maryland
119 | 1394,1394,24.0,0.0,"baltimore, maryland, usa",english423,english,female,1395,usa,maryland
120 | 1410,1410,30.0,0.0,"florence, south carolina, usa",english426,english,female,1411,usa,south carolina
121 | 1411,1411,68.0,0.0,"hanover, pennsylvania, usa",english427,english,female,1412,usa,pennsylvania
122 | 1412,1412,55.0,0.0,"kingstree, south carolina, usa",english428,english,female,1413,usa,south carolina
123 | 1415,1415,19.0,0.0,"philadelphia, pennsylvania, usa",english429,english,female,1416,usa,pennsylvania
124 | 1422,1422,23.0,0.0,"green bay, wisconsin, usa",english431,english,female,1423,usa,wisconsin
125 | 1429,1429,32.0,0.0,"gladwin, michigan, usa",english433,english,female,1430,usa,michigan
126 | 1455,1455,32.0,12.0,"fort collins, colorado, usa",arabic45,arabic,female,1456,usa,colorado
127 | 1470,1470,20.0,0.0,"palm springs, california, usa",english435,english,female,1471,usa,california
128 | 1491,1491,20.0,0.0,"houston, texas, usa",english438,english,female,1492,usa,texas
129 | 1523,1523,19.0,0.0,"oquawka, illinois, usa",english441,english,female,1524,usa,illinois
130 | 1532,1532,23.0,0.0,"moorhead, minnesota, usa",english446,english,female,1533,usa,minnesota
131 | 1538,1538,20.0,5.0,"alexandria, virginia, usa",urdu9,urdu,female,1539,usa,virginia
132 | 1547,1547,21.0,0.0,"chicago, illinois, usa",english453,english,female,1548,usa,illinois
133 | 1549,1549,19.0,0.0,"raleigh, north carolina, usa",english454,english,female,1550,usa,north carolina
134 | 1551,1551,29.0,0.0,"washington, dc, usa",english455,english,female,1552,usa,dc
135 | 1558,1558,22.0,0.0,"reno, nevada, usa",english458,english,female,1559,usa,nevada
136 | 1562,1562,44.0,0.0,"poughkeepsie, new york, usa",english461,english,female,1563,usa,new york
137 | 1570,1570,53.0,0.0,"brooklyn, new york, usa",english463,english,female,1571,usa,new york
138 | 1571,1571,52.0,6.0,"bethesda, maryland, usa",farsi14,farsi,female,1572,usa,maryland
139 | 1655,1655,33.0,0.0,"rochester, minnesota, usa",english474,english,female,1656,usa,minnesota
140 | 1656,1656,29.0,0.0,"ogden, utah, usa",english475,english,female,1657,usa,utah
141 | 1657,1657,64.0,0.0,"manchester, new hampshire, usa",english476,english,female,1658,usa,new hampshire
142 | 1658,1658,31.0,0.0,"san diego, california, usa",english477,english,female,1659,usa,california
143 | 1663,1663,64.0,0.0,"new york, new york, usa",english479,english,female,1664,usa,new york
144 | 1665,1665,88.0,0.0,"stringtown, oklahoma, usa",english481,english,female,1666,usa,oklahoma
145 | 1666,1666,74.0,0.0,"yeadon, pennsylvania, usa",english482,english,female,1667,usa,pennsylvania
146 | 1675,1675,69.0,4.0,"pa'ia, hawai'i, usa",hawai'i,hawai'i,female,1676,usa,hawai'i
147 | 1715,1715,22.0,0.0,"omaha, nebraska, usa",english492,english,female,1716,usa,nebraska
148 | 1727,1727,21.0,0.0,"st. paul, minnesota, usa",english499,english,female,1728,usa,minnesota
149 | 1733,1733,64.0,0.0,"pittsburgh, pennsylvania, usa",english501,english,female,1734,usa,pennsylvania
150 | 1734,1734,30.0,0.0,"farmington hills, michigan, usa",english502,english,female,1735,usa,michigan
151 | 1749,1749,25.0,5.0,"brooklyn, new york, usa",russian39,russian,female,1750,usa,new york
152 | 1798,1798,30.0,0.0,"new orleans, louisiana, usa",english506,english,female,1799,usa,louisiana
153 | 1800,1800,20.0,0.0,"winfield, illinois, usa",english508,english,female,1801,usa,illinois
154 | 1801,1801,44.0,0.0,"coudersport, pennsylvania, usa",english509,english,female,1802,usa,pennsylvania
155 | 1802,1802,66.0,0.0,"philadelphia, pennsylvania, usa",english510,english,female,1803,usa,pennsylvania
156 | 1815,1815,22.0,3.0,"lancaster, pennsylvania, usa",greek14,greek,female,1816,usa,pennsylvania
157 | 1817,1817,31.0,0.0,"montgomery, alabama, usa",english511,english,female,1818,usa,alabama
158 | 1818,1818,54.0,0.0,"salina, kansas, usa",english512,english,female,1819,usa,kansas
159 | 1872,1872,55.0,0.0,"youngstown, ohio, usa",english515,english,female,1873,usa,ohio
160 | 1874,1874,32.0,0.0,"dripping springs, texas, usa",english516,english,female,1875,usa,texas
161 | 1883,1883,47.0,0.0,"st. paul, minnesota, usa",english520,english,female,1884,usa,minnesota
162 | 1885,1885,25.0,0.0,"bethesda, maryland, usa",english521,english,female,1886,usa,maryland
163 | 1886,1886,22.0,0.0,"bethpage, new york, usa",english522,english,female,1887,usa,new york
164 | 1887,1887,18.0,0.0,"madison, wisconsin, usa",english523,english,female,1888,usa,wisconsin
165 | 1892,1892,18.0,0.0,"albion, new york, usa",english525,english,female,1893,usa,new york
166 | 1896,1896,19.0,3.0,"washington, district of columbia, usa",arabic78,arabic,female,1897,usa,district of columbia
167 | 1906,1906,25.0,0.0,"wynnewood, pennsylvania, usa",english527,english,female,1907,usa,pennsylvania
168 | 1907,1907,52.0,0.0,"green bay, wisconsin, usa",english528,english,female,1908,usa,wisconsin
169 | 1924,1924,29.0,0.0,"burlington, vermont, usa",english529,english,female,1925,usa,vermont
170 | 1925,1925,50.0,0.0,"detroit, michigan, usa",english530,english,female,1926,usa,michigan
171 | 1927,1927,29.0,0.0,"nokesville, virginia, usa",english531,english,female,1928,usa,virginia
172 | 1932,1932,23.0,0.0,"bristol, connecticut, usa",english532,english,female,1933,usa,connecticut
173 | 1933,1933,38.0,0.0,"fairfax, virginia, usa",english533,english,female,1934,usa,virginia
174 | 1950,1950,29.0,0.0,"mineola, new york, usa",english534,english,female,1951,usa,new york
175 | 1954,1954,22.0,0.0,"plymouth, massachusetts, usa",english535,english,female,1955,usa,massachusetts
176 | 1955,1955,21.0,0.0,"richmond, virginia, usa",english536,english,female,1956,usa,virginia
177 | 1956,1956,21.0,0.0,"woodbridge, virginia, usa",english537,english,female,1957,usa,virginia
178 | 1957,1957,20.0,0.0,"lyndhurst, virginia, usa",english538,english,female,1958,usa,virginia
179 | 1958,1958,19.0,0.0,"orange county, california, usa",english539,english,female,1959,usa,california
180 | 1959,1959,18.0,0.0,"arlington, virginia, usa",english540,english,female,1960,usa,virginia
181 | 1960,1960,27.0,0.0,"fort lauderdale, florida, usa",english541,english,female,1961,usa,florida
182 | 1995,1995,43.0,0.0,"tangier island, virginia, usa",english546,english,female,1996,usa,virginia
183 | 2028,2028,81.0,0.0,"king george, virginia, usa",english551,english,female,2029,usa,virginia
184 | 2042,2042,23.0,0.0,"bethesda, maryland, usa",english552,english,female,2043,usa,maryland
185 | 2046,2046,22.0,0.0,"hoffman estates, illinois, usa",english554,english,female,2047,usa,illinois
186 | 2049,2049,84.0,0.0,"brooklyn, new york, usa",english556,english,female,2050,usa,new york
187 | 2055,2055,19.0,0.0,"reston, virginia, usa",english559,english,female,2056,usa,virginia
188 | 2058,2058,22.0,0.0,"hartford, connecticut, usa",english560,english,female,2059,usa,connecticut
189 | 2059,2059,20.0,0.0,"yorktown, virginia, usa",english561,english,female,2060,usa,virginia
190 | 2060,2060,30.0,0.0,"florence, south carolina, usa",english562,english,female,2061,usa,south carolina
191 | 2079,2079,27.0,0.0,"portland, oregon, usa",english565,english,female,2080,usa,oregon
192 | 2102,2102,86.0,0.0,"quincy, florida, usa",english568,english,female,2103,usa,florida
193 | 2105,2105,42.0,0.0,"parma, ohio, usa",english570,english,female,2106,usa,ohio
194 | 2166,2166,63.0,0.0,"washington, dc, usa",english576,english,female,2167,usa,dc
195 | 2169,2169,38.0,0.0,"san leandro, california, usa",english577,english,female,2170,usa,california
196 |
--------------------------------------------------------------------------------
/dataframes/df_english_female.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 61,61,35.0,0.0,"davenport, iowa, usa",english10,english,female,62,usa
3 | 62,62,23.0,0.0,"miami, florida, usa",english100,english,female,63,usa
4 | 63,63,18.0,0.0,"toronto, ontario, canada",english101,english,female,64,canada
5 | 66,66,18.0,0.0,"youngstown, ohio, usa",english104,english,female,67,usa
6 | 67,67,35.0,0.0,"staffordshire, uk",english11,english,female,68,uk
7 | 68,68,71.0,0.0,"leicester, uk",english12,english,female,69,uk
8 | 71,71,7.0,0.0,"norton, virginia, usa",english15,english,female,72,usa
9 | 74,74,39.0,0.0,"dickenson county, virginia, usa",english18,english,female,75,usa
10 | 76,76,30.0,0.0,"birmingham, uk",english2,english,female,77,uk
11 | 77,77,27.0,0.0,"mt. holly, north carolina, usa",english20,english,female,78,usa
12 | 78,78,37.0,0.0,"boston, massachusetts, usa",english21,english,female,79,usa
13 | 79,79,25.0,0.0,"panama city, panama",english22,english,female,80,panama
14 | 84,84,37.0,0.0,"smith island, maryland, usa",english27,english,female,85,usa
15 | 86,86,54.0,0.0,"lindsay, ontario, canada",english29,english,female,87,canada
16 | 87,87,26.0,0.0,"brisbane, australia",english3,english,female,88,australia
17 | 88,88,30.0,0.0,"st. mary's, ontario, canada",english30,english,female,89,canada
18 | 89,89,31.0,0.0,"st. augustine, trinidad",english31,english,female,90,trinidad
19 | 90,90,50.0,0.0,"fresno, california, usa",english32,english,female,91,usa
20 | 92,92,18.0,0.0,"ronkonkoma, new york, usa",english34,english,female,93,usa
21 | 93,93,60.0,0.0,"milwaukee, wisconsin, usa",english35,english,female,94,usa
22 | 95,95,28.0,0.0,"yole, india",english37,english,female,96,india
23 | 97,97,59.0,0.0,"new orleans, louisiana, usa",english39,english,female,98,usa
24 | 98,98,53.0,0.0,"saint anne's bay, jamaica",english4,english,female,99,jamaica
25 | 103,103,63.0,0.0,"winston salem, north carolina, usa",english44,english,female,104,usa
26 | 106,106,76.0,0.0,"plantersville, arkansas, usa",english47,english,female,107,usa
27 | 110,110,29.0,0.0,"baltic, south dakota, usa",english50,english,female,111,usa
28 | 114,114,40.0,0.0,"new albany, indiana, usa",english54,english,female,115,usa
29 | 115,115,53.0,0.0,"st. louis, missouri, usa",english55,english,female,116,usa
30 | 118,118,46.0,0.0,"southhampton, uk",english58,english,female,119,uk
31 | 120,120,45.0,0.0,"brooklyn, new york, usa",english6,english,female,121,usa
32 | 125,125,25.0,0.0,"montreal, quebec, canada",english64,english,female,126,canada
33 | 138,138,18.0,0.0,"boston, massachusetts, usa",english76,english,female,139,usa
34 | 140,140,20.0,0.0,"caribou, maine, usa",english78,english,female,141,usa
35 | 142,142,21.0,0.0,"perth, australia",english8,english,female,143,australia
36 | 145,145,50.0,0.0,"chicago, illinois, usa",english82,english,female,146,usa
37 | 146,146,42.0,0.0,"winnfield, louisiana, usa",english83,english,female,147,usa
38 | 150,150,18.0,0.0,"tamworth, australia",english87,english,female,151,australia
39 | 153,153,48.0,0.0,"carthage, texas, usa",english9,english,female,154,usa
40 | 156,156,22.0,0.0,"wisconsin rapids, wisconsin, usa",english92,english,female,157,usa
41 | 157,157,22.0,0.0,"mamou, louisiana, usa",english93,english,female,158,usa
42 | 158,158,55.0,0.0,"pensacola, florida, usa",english94,english,female,159,usa
43 | 163,163,52.0,0.0,"pittsburgh, pennsylvania, usa",english99,english,female,164,usa
44 | 407,407,39.0,0.0,"los angeles, california, usa",english106,english,female,408,usa
45 | 419,419,26.0,0.0,"concord, new hampshire, usa",english109,english,female,420,usa
46 | 441,441,38.0,0.0,"birmingham 2, alabama, usa",english114,english,female,442,usa
47 | 444,444,60.0,0.0,"elmore, alabama, usa",english117,english,female,445,usa
48 | 454,454,32.0,0.0,"saskatoon, saskatchewan, canada",english119,english,female,455,canada
49 | 467,467,20.0,0.0,"kuala lumpur, malaysia",english122,english,female,468,malaysia
50 | 468,468,28.0,0.0,"riverside, california, usa",english123,english,female,469,usa
51 | 486,486,34.0,0.0,"cincinnati, ohio, usa",english126,english,female,487,usa
52 | 489,489,20.0,0.0,"miami, florida, usa",english128,english,female,490,usa
53 | 502,502,18.0,0.0,"toronto, ontario, canada",english132,english,female,503,canada
54 | 503,503,36.0,0.0,"norwich, new york, usa",english133,english,female,504,usa
55 | 505,505,22.0,0.0,"belfast, northern ireland, uk",english134,english,female,506,uk
56 | 509,509,25.0,0.0,"palmer, alaska, usa",english138,english,female,510,usa
57 | 522,522,40.0,0.0,"merced, california, usa",english147,english,female,523,usa
58 | 539,539,18.0,0.0,"washington, district of columbia, usa",english158,english,female,540,usa
59 | 541,541,21.0,0.0,"redwood falls, minnesota, usa",english160,english,female,542,usa
60 | 545,545,75.0,0.0,"wakefield, ohio, usa",english161,english,female,546,usa
61 | 547,547,41.0,0.0,"delaware, ohio, usa",english162,english,female,548,usa
62 | 549,549,43.0,0.0,"detroit, michigan, usa",english165,english,female,550,usa
63 | 555,555,50.0,0.0,"belmont, mississippi, usa",english170,english,female,556,usa
64 | 567,567,30.0,0.0,"kuching, sarawak, malaysia",english174,english,female,568,malaysia
65 | 572,572,18.0,0.0,"elizabeth city, north carolina, usa",english176,english,female,573,usa
66 | 573,573,23.0,0.0,"new orleans, louisiana, usa",english177,english,female,574,usa
67 | 594,594,55.0,0.0,"corriverton, guyana",english183,english,female,595,guyana
68 | 596,596,18.0,0.0,"hillsboro, oregon, usa",english184,english,female,597,usa
69 | 604,604,39.0,0.0,"gadsden, alabama, usa",english186,english,female,605,usa
70 | 605,605,42.0,0.0,"algona, iowa, usa",english187,english,female,606,usa
71 | 635,635,27.0,0.0,"east lansing, michigan, usa",english192,english,female,636,usa
72 | 638,638,21.0,0.0,"delavan, wisconsin, usa",english195,english,female,639,usa
73 | 653,653,50.0,0.0,"suva, fiji",english196,english,female,654,fiji
74 | 666,666,22.0,0.0,"berkeley, california, usa",english200,english,female,667,usa
75 | 667,667,26.0,0.0,"warren, michigan, usa",english201,english,female,668,usa
76 | 673,673,63.0,0.0,"boston, massachusetts, usa",english203,english,female,674,usa
77 | 678,678,45.0,0.0,"blue earth, minnesota, usa",english205,english,female,679,usa
78 | 679,679,35.0,0.0,"hucknall, nottinghamshire, england, uk",english206,english,female,680,uk
79 | 683,683,23.0,0.0,"wilkes-barre, pennsylvania, usa",english207,english,female,684,usa
80 | 701,701,28.0,0.0,"saint johns, antigua and barbuda",english209,english,female,702,antigua and barbuda
81 | 704,704,23.0,0.0,"trenton, nova scotia, canada",english210,english,female,705,canada
82 | 738,738,48.0,0.0,"chicago, illinois, usa",english216,english,female,739,usa
83 | 746,746,27.0,0.0,"abingdon, virginia, usa",english218,english,female,747,usa
84 | 747,747,38.0,0.0,"oakland, california, usa",english219,english,female,748,usa
85 | 748,748,18.0,0.0,"anaheim, california, usa",english220,english,female,749,usa
86 | 763,763,18.0,0.0,"auburn, indiana, usa",english222,english,female,764,usa
87 | 772,772,42.0,0.0,"camberley, surrey, uk",english227,english,female,773,uk
88 | 773,773,59.0,0.0,"glasgow, scotland, uk",english228,english,female,774,uk
89 | 792,792,26.0,0.0,"edmonton, alberta, canada",english233,english,female,793,canada
90 | 794,794,52.0,0.0,"richmond, virginia, usa",english235,english,female,795,usa
91 | 797,797,32.0,0.0,"metairie, louisiana, usa",english232,english,female,798,usa
92 | 800,800,22.0,0.0,"jeffersonville, ohio, usa",english237,english,female,801,usa
93 | 801,801,21.0,0.0,"vancouver, british columbia, canada",english238,english,female,802,canada
94 | 816,816,52.0,0.0,"wanganui, new zealand",english241,english,female,817,new zealand
95 | 817,817,77.0,0.0,"laurel, mississippi, usa",english242,english,female,818,usa
96 | 839,839,18.0,0.0,"philadelphia, pennsylvania, usa",english248,english,female,840,usa
97 | 849,849,29.0,0.0,"boston, massachusetts, usa",english249,english,female,850,usa
98 | 855,855,25.0,0.0,"san jose ca, california, usa",english252,english,female,856,usa
99 | 858,858,24.0,0.0,"oamaru, new zealand",english255,english,female,859,new zealand
100 | 870,870,18.0,0.0,"phoenix, arizona, usa",english260,english,female,871,usa
101 | 874,874,59.0,0.0,"forestville, maryland, usa",english261,english,female,875,usa
102 | 900,900,67.0,0.0,"leeds, uk",english268,english,female,901,uk
103 | 901,901,22.0,0.0,"adelaide, australia",english269,english,female,902,australia
104 | 903,903,32.0,0.0,"cape town, south africa",english271,english,female,904,south africa
105 | 908,908,25.0,0.0,"sydney, australia",english274,english,female,909,australia
106 | 911,911,20.0,0.0,"clifton, new jersey, usa",english275,english,female,912,usa
107 | 917,917,50.0,0.0,"chattanooga, tennessee, usa",english276,english,female,918,usa
108 | 918,918,48.0,0.0,"hudson, new york, usa",english277,english,female,919,usa
109 | 920,920,18.0,0.0,"augusta, georgia, usa",english278,english,female,921,usa
110 | 921,921,30.0,0.0,"singapore, singapore",english279,english,female,922,singapore
111 | 934,934,19.0,0.0,"ramsey, isle of man",english280,english,female,935,isle of man
112 | 976,976,21.0,0.0,"beaumont, texas, usa",english286,english,female,977,usa
113 | 980,980,27.0,0.0,"singapore, singapore",english287,english,female,981,singapore
114 | 981,981,26.0,0.0,"st. louis, missouri, usa",english288,english,female,982,usa
115 | 982,982,19.0,0.0,"fairfax, virginia, usa",english289,english,female,983,usa
116 | 1051,1051,22.0,0.0,"billings, montana, usa",english293,english,female,1052,usa
117 | 1059,1059,19.0,0.0,"naracoorte, australia",english294,english,female,1060,australia
118 | 1069,1069,18.0,0.0,"edmonton, alberta, canada",english296,english,female,1070,canada
119 | 1085,1085,45.0,0.0,"anaheim, california, usa",english303,english,female,1086,usa
120 | 1087,1087,20.0,0.0,"clare, ireland",english304,english,female,1088,ireland
121 | 1092,1092,24.0,0.0,"london, uk",english306,english,female,1093,uk
122 | 1093,1093,24.0,0.0,"la paz, bolivia",english305,english,female,1094,bolivia
123 | 1104,1104,44.0,0.0,"dunedin, new zealand",english309,english,female,1105,new zealand
124 | 1107,1107,37.0,0.0,"oxford, uk",english310,english,female,1108,uk
125 | 1109,1109,38.0,0.0,"detroit, michigan, usa",english311,english,female,1110,usa
126 | 1112,1112,40.0,0.0,"edmonton, alberta, canada",english312,english,female,1113,canada
127 | 1119,1119,26.0,0.0,"memphis, tennessee, usa",english314,english,female,1120,usa
128 | 1161,1161,59.0,0.0,"maryland county, liberia",english320,english,female,1162,liberia
129 | 1172,1172,31.0,0.0,"regina, saskatchewan, canada",english323,english,female,1173,canada
130 | 1205,1205,23.0,0.0,"hollywood, florida, usa",english328,english,female,1206,usa
131 | 1206,1206,21.0,0.0,"boise, idaho, usa",english329,english,female,1207,usa
132 | 1207,1207,18.0,0.0,"augusta, georgia, usa",english330,english,female,1208,usa
133 | 1208,1208,28.0,0.0,"perth, australia",english331,english,female,1209,australia
134 | 1211,1211,20.0,0.0,"baltimore, maryland, usa",english332,english,female,1212,usa
135 | 1213,1213,32.0,0.0,"spokane, washington, usa",english333,english,female,1214,usa
136 | 1214,1214,18.0,0.0,"derby, england, uk",english334,english,female,1215,uk
137 | 1216,1216,46.0,0.0,"baltimore, maryland, usa",english336,english,female,1217,usa
138 | 1217,1217,84.0,0.0,"jersey city, new jersey, usa",english337,english,female,1218,usa
139 | 1218,1218,53.0,0.0,"sydney, australia",english338,english,female,1219,australia
140 | 1223,1223,23.0,0.0,"new york, new york, usa",english341,english,female,1224,usa
141 | 1231,1231,19.0,0.0,"lumberton, north carolina, usa",english344,english,female,1232,usa
142 | 1235,1235,22.0,0.0,"burnsville, minnesota, usa",english347,english,female,1236,usa
143 | 1236,1236,20.0,0.0,"las pinas, philippines",english348,english,female,1237,philippines
144 | 1238,1238,24.0,0.0,"warrenton, virginia, usa",english349,english,female,1239,usa
145 | 1240,1240,18.0,0.0,"polesworth, staffordshire, uk",english350,english,female,1241,uk
146 | 1249,1249,23.0,0.0,"christchurch, new zealand",english354,english,female,1250,new zealand
147 | 1256,1256,23.0,0.0,"morden, manitoba, canada",english356,english,female,1257,canada
148 | 1276,1276,19.0,0.0,"erie, pennsylvania, usa",english360,english,female,1277,usa
149 | 1279,1279,20.0,0.0,"burnsville, minnesota, usa",english361,english,female,1280,usa
150 | 1287,1287,28.0,0.0,"sydney, australia",english364,english,female,1288,australia
151 | 1292,1292,25.0,0.0,"bournesmouth, uk",english366,english,female,1293,uk
152 | 1293,1293,25.0,0.0,"birkenhead, uk",english367,english,female,1294,uk
153 | 1302,1302,27.0,0.0,"colorado springs, colorado, usa",english371,english,female,1303,usa
154 | 1318,1318,32.0,0.0,"trenton, michigan, usa",english377,english,female,1319,usa
155 | 1319,1319,38.0,0.0,"silver spring, maryland, usa",english378,english,female,1320,usa
156 | 1322,1322,34.0,0.0,"roanoke, virginia, usa",english380,english,female,1323,usa
157 | 1325,1325,32.0,0.0,"washington, district of columbia, usa",english383,english,female,1326,usa
158 | 1327,1327,77.0,0.0,"mcveigh, kentucky, usa",english385,english,female,1328,usa
159 | 1329,1329,56.0,0.0,"los angeles, california, usa",english386,english,female,1330,usa
160 | 1330,1330,21.0,0.0,"lancaster, california, usa",english387,english,female,1331,usa
161 | 1331,1331,70.0,0.0,"bluefield, west virginia, usa",english388,english,female,1332,usa
162 | 1332,1332,71.0,0.0,"aldie, virginia, usa",english389,english,female,1333,usa
163 | 1334,1334,84.0,0.0,"milton, florida, usa",english391,english,female,1335,usa
164 | 1336,1336,58.0,0.0,"washington, district of columbia, usa",english393,english,female,1337,usa
165 | 1338,1338,82.0,0.0,"aiken, south carolina, usa",english394,english,female,1339,usa
166 | 1340,1340,48.0,0.0,"orange, virginia, usa",english396,english,female,1341,usa
167 | 1341,1341,76.0,0.0,"wadesboro, north carolina, usa",english397,english,female,1342,usa
168 | 1342,1342,83.0,0.0,"salisbury, north carolina, usa",english398,english,female,1343,usa
169 | 1344,1344,80.0,0.0,"la grange, georgia, usa",english399,english,female,1345,usa
170 | 1347,1347,59.0,0.0,"shanklin, isle of wight, uk",english400,english,female,1348,uk
171 | 1349,1349,26.0,0.0,"canberra, australia",english402,english,female,1350,australia
172 | 1350,1350,29.0,0.0,"sydney, australia",english403,english,female,1351,australia
173 | 1352,1352,32.0,0.0,"geelong, australia",english405,english,female,1353,australia
174 | 1357,1357,24.0,0.0,"pensacola, florida, usa",english406,english,female,1358,usa
175 | 1361,1361,60.0,0.0,"maitland, australia",english409,english,female,1362,australia
176 | 1370,1370,26.0,0.0,"washington, district of columbia, usa",english411,english,female,1371,usa
177 | 1371,1371,31.0,0.0,"washington, district of columbia, usa",english412,english,female,1372,usa
178 | 1372,1372,20.0,0.0,"manchester, england, uk",english413,english,female,1373,uk
179 | 1373,1373,30.0,0.0,"woonsocket, rhode island, usa",english414,english,female,1374,usa
180 | 1379,1379,20.0,0.0,"cardiff, wales, uk",english417,english,female,1380,uk
181 | 1394,1394,24.0,0.0,"baltimore, maryland, usa",english423,english,female,1395,usa
182 | 1410,1410,30.0,0.0,"florence, south carolina, usa",english426,english,female,1411,usa
183 | 1411,1411,68.0,0.0,"hanover, pennsylvania, usa",english427,english,female,1412,usa
184 | 1412,1412,55.0,0.0,"kingstree, south carolina, usa",english428,english,female,1413,usa
185 | 1415,1415,19.0,0.0,"philadelphia, pennsylvania, usa",english429,english,female,1416,usa
186 | 1418,1418,23.0,0.0,"melbourne, australia",english430,english,female,1419,australia
187 | 1422,1422,23.0,0.0,"green bay, wisconsin, usa",english431,english,female,1423,usa
188 | 1427,1427,22.0,0.0,"dublin, ireland",english432,english,female,1428,ireland
189 | 1429,1429,32.0,0.0,"gladwin, michigan, usa",english433,english,female,1430,usa
190 | 1470,1470,20.0,0.0,"palm springs, california, usa",english435,english,female,1471,usa
191 | 1474,1474,19.0,0.0,"freeport, the bahamas",english436,english,female,1475,the bahamas
192 | 1491,1491,20.0,0.0,"houston, texas, usa",english438,english,female,1492,usa
193 | 1492,1492,18.0,0.0,"richmond, british columbia, canada",english439,english,female,1493,canada
194 | 1501,1501,22.0,0.0,"glasgow, scotland, uk",english440,english,female,1502,uk
195 | 1523,1523,19.0,0.0,"oquawka, illinois, usa",english441,english,female,1524,usa
196 | 1532,1532,23.0,0.0,"moorhead, minnesota, usa",english446,english,female,1533,usa
197 | 1547,1547,21.0,0.0,"chicago, illinois, usa",english453,english,female,1548,usa
198 | 1549,1549,19.0,0.0,"raleigh, north carolina, usa",english454,english,female,1550,usa
199 | 1551,1551,29.0,0.0,"washington, dc, usa",english455,english,female,1552,usa
200 | 1553,1553,37.0,0.0,"lancashire, leyland, uk",english456,english,female,1554,uk
201 | 1558,1558,22.0,0.0,"reno, nevada, usa",english458,english,female,1559,usa
202 | 1560,1560,19.0,0.0,"rutland, england, uk",english460,english,female,1561,uk
203 | 1562,1562,44.0,0.0,"poughkeepsie, new york, usa",english461,english,female,1563,usa
204 | 1570,1570,53.0,0.0,"brooklyn, new york, usa",english463,english,female,1571,usa
205 | 1647,1647,26.0,0.0,"calgary, alberta, canada",english471,english,female,1648,canada
206 | 1655,1655,33.0,0.0,"rochester, minnesota, usa",english474,english,female,1656,usa
207 | 1656,1656,29.0,0.0,"ogden, utah, usa",english475,english,female,1657,usa
208 | 1657,1657,64.0,0.0,"manchester, new hampshire, usa",english476,english,female,1658,usa
209 | 1658,1658,31.0,0.0,"san diego, california, usa",english477,english,female,1659,usa
210 | 1663,1663,64.0,0.0,"new york, new york, usa",english479,english,female,1664,usa
211 | 1665,1665,88.0,0.0,"stringtown, oklahoma, usa",english481,english,female,1666,usa
212 | 1666,1666,74.0,0.0,"yeadon, pennsylvania, usa",english482,english,female,1667,usa
213 | 1682,1682,53.0,0.0,"corentyne, guyana",english486,english,female,1683,guyana
214 | 1687,1687,22.0,0.0,"london, uk",english487,english,female,1688,uk
215 | 1691,1691,35.0,0.0,"irvine, scotland, uk",english488,english,female,1692,uk
216 | 1693,1693,46.0,0.0,"kingston, jamaica",english489,english,female,1694,jamaica
217 | 1712,1712,79.0,0.0,"westmoreland, jamaica",english491,english,female,1713,jamaica
218 | 1715,1715,22.0,0.0,"omaha, nebraska, usa",english492,english,female,1716,usa
219 | 1727,1727,21.0,0.0,"st. paul, minnesota, usa",english499,english,female,1728,usa
220 | 1733,1733,64.0,0.0,"pittsburgh, pennsylvania, usa",english501,english,female,1734,usa
221 | 1734,1734,30.0,0.0,"farmington hills, michigan, usa",english502,english,female,1735,usa
222 | 1738,1738,29.0,0.0,"benin city, nigeria",english503,english,female,1739,nigeria
223 | 1797,1797,50.0,0.0,"beirut, lebanon",english505,english,female,1798,lebanon
224 | 1798,1798,30.0,0.0,"new orleans, louisiana, usa",english506,english,female,1799,usa
225 | 1800,1800,20.0,0.0,"winfield, illinois, usa",english508,english,female,1801,usa
226 | 1801,1801,44.0,0.0,"coudersport, pennsylvania, usa",english509,english,female,1802,usa
227 | 1802,1802,66.0,0.0,"philadelphia, pennsylvania, usa",english510,english,female,1803,usa
228 | 1817,1817,31.0,0.0,"montgomery, alabama, usa",english511,english,female,1818,usa
229 | 1818,1818,54.0,0.0,"salina, kansas, usa",english512,english,female,1819,usa
230 | 1855,1855,52.0,0.0,"bocas del toro, panama",english513,english,female,1856,panama
231 | 1872,1872,55.0,0.0,"youngstown, ohio, usa",english515,english,female,1873,usa
232 | 1874,1874,32.0,0.0,"dripping springs, texas, usa",english516,english,female,1875,usa
233 | 1881,1881,18.0,1.0,"karachi, pakistan",english519,english,female,1882,pakistan
234 | 1883,1883,47.0,0.0,"st. paul, minnesota, usa",english520,english,female,1884,usa
235 | 1885,1885,25.0,0.0,"bethesda, maryland, usa",english521,english,female,1886,usa
236 | 1886,1886,22.0,0.0,"bethpage, new york, usa",english522,english,female,1887,usa
237 | 1887,1887,18.0,0.0,"madison, wisconsin, usa",english523,english,female,1888,usa
238 | 1892,1892,18.0,0.0,"albion, new york, usa",english525,english,female,1893,usa
239 | 1906,1906,25.0,0.0,"wynnewood, pennsylvania, usa",english527,english,female,1907,usa
240 | 1907,1907,52.0,0.0,"green bay, wisconsin, usa",english528,english,female,1908,usa
241 | 1924,1924,29.0,0.0,"burlington, vermont, usa",english529,english,female,1925,usa
242 | 1925,1925,50.0,0.0,"detroit, michigan, usa",english530,english,female,1926,usa
243 | 1927,1927,29.0,0.0,"nokesville, virginia, usa",english531,english,female,1928,usa
244 | 1932,1932,23.0,0.0,"bristol, connecticut, usa",english532,english,female,1933,usa
245 | 1933,1933,38.0,0.0,"fairfax, virginia, usa",english533,english,female,1934,usa
246 | 1950,1950,29.0,0.0,"mineola, new york, usa",english534,english,female,1951,usa
247 | 1954,1954,22.0,0.0,"plymouth, massachusetts, usa",english535,english,female,1955,usa
248 | 1955,1955,21.0,0.0,"richmond, virginia, usa",english536,english,female,1956,usa
249 | 1956,1956,21.0,0.0,"woodbridge, virginia, usa",english537,english,female,1957,usa
250 | 1957,1957,20.0,0.0,"lyndhurst, virginia, usa",english538,english,female,1958,usa
251 | 1958,1958,19.0,0.0,"orange county, california, usa",english539,english,female,1959,usa
252 | 1959,1959,18.0,0.0,"arlington, virginia, usa",english540,english,female,1960,usa
253 | 1960,1960,27.0,0.0,"fort lauderdale, florida, usa",english541,english,female,1961,usa
254 | 1968,1968,39.0,0.0,"vancouver, british columbia, canada",english543,english,female,1969,canada
255 | 1995,1995,43.0,0.0,"tangier island, virginia, usa",english546,english,female,1996,usa
256 | 2011,2011,19.0,0.0,"exminister, uk",english549,english,female,2012,uk
257 | 2028,2028,81.0,0.0,"king george, virginia, usa",english551,english,female,2029,usa
258 | 2042,2042,23.0,0.0,"bethesda, maryland, usa",english552,english,female,2043,usa
259 | 2046,2046,22.0,0.0,"hoffman estates, illinois, usa",english554,english,female,2047,usa
260 | 2049,2049,84.0,0.0,"brooklyn, new york, usa",english556,english,female,2050,usa
261 | 2052,2052,25.0,2.0,"sharjah, united arab emirates",english557,english,female,2053,united arab emirates
262 | 2055,2055,19.0,0.0,"reston, virginia, usa",english559,english,female,2056,usa
263 | 2058,2058,22.0,0.0,"hartford, connecticut, usa",english560,english,female,2059,usa
264 | 2059,2059,20.0,0.0,"yorktown, virginia, usa",english561,english,female,2060,usa
265 | 2060,2060,30.0,0.0,"florence, south carolina, usa",english562,english,female,2061,usa
266 | 2079,2079,27.0,0.0,"portland, oregon, usa",english565,english,female,2080,usa
267 | 2102,2102,86.0,0.0,"quincy, florida, usa",english568,english,female,2103,usa
268 | 2105,2105,42.0,0.0,"parma, ohio, usa",english570,english,female,2106,usa
269 | 2161,2161,26.0,0.0,"toronto, ontario, canada",english574,english,female,2162,canada
270 | 2166,2166,63.0,0.0,"washington, dc, usa",english576,english,female,2167,usa
271 | 2169,2169,38.0,0.0,"san leandro, california, usa",english577,english,female,2170,usa
272 |
--------------------------------------------------------------------------------
/dataframes/df_english_male.csv:
--------------------------------------------------------------------------------
1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country
2 | 60,60,42.0,0.0,"pittsburgh, pennsylvania, usa",english1,english,male,61,usa
3 | 64,64,22.0,0.0,"torrington, connecticut, usa",english102,english,male,65,usa
4 | 65,65,21.0,0.0,"staten island, new york, usa",english103,english,male,66,usa
5 | 69,69,69.0,0.0,"henley on thames, oxfordshire, uk",english13,english,male,70,uk
6 | 70,70,40.0,0.0,"belfast, northern ireland, uk",english14,english,male,71,uk
7 | 72,72,21.0,0.0,"wise, virginia, usa",english16,english,male,73,usa
8 | 73,73,79.0,0.0,"darwin va, virginia, usa",english17,english,male,74,usa
9 | 75,75,53.0,0.0,"louisville, kentucky, usa",english19,english,male,76,usa
10 | 80,80,43.0,0.0,"englewood, tennessee, usa",english23,english,male,81,usa
11 | 81,81,37.0,0.0,"glasgow, scotland, uk",english24,english,male,82,uk
12 | 82,82,57.0,0.0,"atlanta, georgia, usa",english25,english,male,83,usa
13 | 83,83,71.0,0.0,"smith island, maryland, usa",english26,english,male,84,usa
14 | 85,85,54.0,0.0,"toronto, ontario, canada",english28,english,male,86,canada
15 | 91,91,21.0,0.0,"san diego, california, usa",english33,english,male,92,usa
16 | 94,94,60.0,0.0,"arcadia, wisconsin, usa",english36,english,male,95,usa
17 | 96,96,27.0,0.0,"littlehampton, uk",english38,english,male,97,uk
18 | 99,99,53.0,0.0,"dudley, uk",english40,english,male,100,uk
19 | 100,100,44.0,0.0,"wellington, new zealand",english41,english,male,101,new zealand
20 | 101,101,65.0,0.0,"nigel, south africa",english42,english,male,102,south africa
21 | 102,102,47.0,0.0,"vancouver, british columbia, canada",english43,english,male,103,canada
22 | 104,104,52.0,0.0,"pine bluff, arkansas, usa",english45,english,male,105,usa
23 | 105,105,73.0,0.0,"creswell, north carolina, usa",english46,english,male,106,usa
24 | 107,107,26.0,0.0,"port moresby, national capital district, papua new guinea",english48,english,male,108,papua new guinea
25 | 108,108,43.0,0.0,"castro valley, california, usa",english49,english,male,109,usa
26 | 109,109,62.0,0.0,"fairfax, virginia, usa",english5,english,male,110,usa
27 | 111,111,67.0,0.0,"detroit, michigan, usa",english51,english,male,112,usa
28 | 112,112,20.0,0.0,"calgary, alberta, canada",english52,english,male,113,canada
29 | 113,113,63.0,0.0,"syracuse, new york, usa",english53,english,male,114,usa
30 | 116,116,20.0,0.0,"londonderry, northern ireland, uk",english56,english,male,117,uk
31 | 117,117,34.0,0.0,"birmingham 1, uk",english57,english,male,118,uk
32 | 119,119,30.0,0.0,"columbus, ohio, usa",english59,english,male,120,usa
33 | 121,121,18.0,0.0,"chesapeake, virginia, usa",english60,english,male,122,usa
34 | 122,122,19.0,0.0,"toronto, ontario, canada",english61,english,male,123,canada
35 | 123,123,30.0,0.0,"west jordan, utah, usa",english62,english,male,124,usa
36 | 124,124,21.0,0.0,"lewisville, texas, usa",english63,english,male,125,usa
37 | 126,126,47.0,0.0,"lewistown, pennsylvania, usa",english65,english,male,127,usa
38 | 127,127,19.0,0.0,"baltimore, maryland, usa",english66,english,male,128,usa
39 | 128,128,37.0,0.0,"princeton, indiana, usa",english67,english,male,129,usa
40 | 129,129,52.0,0.0,"new york, new york, usa",english68,english,male,130,usa
41 | 130,130,23.0,0.0,"adelaide, australia",english69,english,male,131,australia
42 | 131,131,52.0,0.0,"macon, mississippi, usa",english7,english,male,132,usa
43 | 132,132,21.0,0.0,"beaumont, texas, usa",english70,english,male,133,usa
44 | 133,133,20.0,0.0,"west covina, california, usa",english71,english,male,134,usa
45 | 134,134,31.0,0.0,"kilkenny, ireland",english72,english,male,135,ireland
46 | 135,135,24.0,0.0,"darwin, australia",english73,english,male,136,australia
47 | 136,136,21.0,0.0,"wichita, kansas, usa",english74,english,male,137,usa
48 | 137,137,32.0,0.0,"idaho falls, idaho, usa",english75,english,male,138,usa
49 | 139,139,26.0,0.0,"melbourne, australia",english77,english,male,140,australia
50 | 141,141,73.0,0.0,"boston, massachusetts, usa",english79,english,male,142,usa
51 | 143,143,34.0,0.0,"glasgow, scotland, uk",english80,english,male,144,uk
52 | 144,144,30.0,0.0,"st. paul, minnesota, usa",english81,english,male,145,usa
53 | 147,147,45.0,0.0,"sydney, australia",english84,english,male,148,australia
54 | 148,148,31.0,0.0,"strabane, northern ireland, uk",english85,english,male,149,uk
55 | 149,149,50.0,0.0,"charleston, south carolina, usa",english86,english,male,150,usa
56 | 151,151,18.0,0.0,"grand rapids, michigan, usa",english88,english,male,152,usa
57 | 152,152,18.0,0.0,"kingston ma, massachusetts, usa",english89,english,male,153,usa
58 | 154,154,53.0,0.0,"pittsburgh, pennsylvania, usa",english90,english,male,155,usa
59 | 155,155,23.0,0.0,"kingston, jamaica",english91,english,male,156,jamaica
60 | 159,159,18.0,0.0,"mishawaka, indiana, usa",english95,english,male,160,usa
61 | 160,160,31.0,0.0,"point pleasant, new jersey, usa",english96,english,male,161,usa
62 | 161,161,42.0,0.0,"wilmington nc, north carolina, usa",english97,english,male,162,usa
63 | 162,162,22.0,0.0,"spokane, washington, usa",english98,english,male,163,usa
64 | 406,406,57.0,0.0,"windsor, ontario, canada",english105,english,male,407,canada
65 | 415,415,19.0,0.0,"chicago, illinois, usa",english107,english,male,416,usa
66 | 418,418,21.0,0.0,"strabane, northern ireland, uk",english108,english,male,419,uk
67 | 420,420,21.0,0.0,"great yarmouth, norfolk, uk",english110,english,male,421,uk
68 | 425,425,21.0,0.0,"bombay, india",english111,english,male,426,india
69 | 426,426,21.0,0.0,"st. thomas, us virgin islands",english112,english,male,427,us virgin islands
70 | 438,438,31.0,0.0,"wrexham, wales, uk",english113,english,male,439,uk
71 | 442,442,31.0,0.0,"st. albans, hertfordshire, uk",english115,english,male,443,uk
72 | 443,443,56.0,0.0,"atlanta, georgia, usa",english116,english,male,444,usa
73 | 445,445,19.0,0.0,"akron, ohio, usa",english118,english,male,446,usa
74 | 455,455,65.0,0.0,"saskatoon, saskatchewan, canada",english120,english,male,456,canada
75 | 464,464,21.0,0.0,"waconia, minnesota, usa",english121,english,male,465,usa
76 | 479,479,38.0,0.0,"brooklyn, new york, usa",english124,english,male,480,usa
77 | 484,484,28.0,0.0,"st. george, queensland, australia",english125,english,male,485,australia
78 | 488,488,39.0,0.0,"new britain, connecticut, usa",english127,english,male,489,usa
79 | 491,491,25.0,0.0,"blackpool, lancashire, uk",english129,english,male,492,uk
80 | 495,495,32.0,0.0,"port elizabeth, south africa",english130,english,male,496,south africa
81 | 496,496,23.0,0.0,"west palm beach, florida, usa",english131,english,male,497,usa
82 | 506,506,22.0,0.0,"orange beach, alabama, usa",english135,english,male,507,usa
83 | 507,507,25.0,0.0,"calgary, alberta, canada",english136,english,male,508,canada
84 | 508,508,26.0,0.0,"charleston, west virginia, usa",english137,english,male,509,usa
85 | 510,510,20.0,0.0,"providence, rhode island, usa",english139,english,male,511,usa
86 | 514,514,43.0,0.0,"quezon city, philippines",english140,english,male,515,philippines
87 | 515,515,19.0,0.0,"york, uk",english141,english,male,516,uk
88 | 516,516,19.0,0.0,"avon, new york, usa",english142,english,male,517,usa
89 | 517,517,42.0,0.0,"minneapolis, minnesota, usa",english143,english,male,518,usa
90 | 518,518,24.0,0.0,"barneys river, nova scotia, canada",english144,english,male,519,canada
91 | 520,520,18.0,0.0,"bury st. edmunds, uk",english145,english,male,521,uk
92 | 521,521,22.0,0.0,"blytheville, arkansas, usa",english146,english,male,522,usa
93 | 524,524,31.0,0.0,"sydney, australia",english148,english,male,525,australia
94 | 525,525,18.0,0.0,"lawrenceville, georgia, usa",english149,english,male,526,usa
95 | 526,526,31.0,0.0,"brownsville, kentucky, usa",english150,english,male,527,usa
96 | 527,527,18.0,0.0,"baltimore, maryland, usa",english151,english,male,528,usa
97 | 528,528,18.0,0.0,"adelaide, australia",english152,english,male,529,australia
98 | 532,532,35.0,0.0,"brisbane, australia",english153,english,male,533,australia
99 | 533,533,28.0,0.0,"glace bay, nova scotia, canada",english154,english,male,534,canada
100 | 534,534,35.0,0.0,"oakland, california, usa",english155,english,male,535,usa
101 | 535,535,33.0,0.0,"dublin, ireland",english156,english,male,536,ireland
102 | 537,537,35.0,0.0,"fort worth, texas, usa",english157,english,male,538,usa
103 | 540,540,18.0,0.0,"ottawa, ontario, canada",english159,english,male,541,canada
104 | 546,546,41.0,0.0,"fairview park, ohio, usa",english163,english,male,547,usa
105 | 548,548,38.0,0.0,"leeds, uk",english164,english,male,549,uk
106 | 550,550,18.0,0.0,"glenside, pennsylvania, usa",english166,english,male,551,usa
107 | 551,551,43.0,0.0,"lakeview, michigan, usa",english167,english,male,552,usa
108 | 553,553,40.0,0.0,"oceanside, california, usa",english168,english,male,554,usa
109 | 554,554,18.0,0.0,"cleveland, mississippi, usa",english169,english,male,555,usa
110 | 562,562,21.0,0.0,"st. louis, missouri, usa",english171,english,male,563,usa
111 | 563,563,49.0,0.0,"kingston, jamaica",english172,english,male,564,jamaica
112 | 566,566,19.0,0.0,"libertyville, illinois, usa",english173,english,male,567,usa
113 | 571,571,24.0,0.0,"seattle, washington, usa",english175,english,male,572,usa
114 | 582,582,30.0,0.0,"wilkes-barre, pennsylvania, usa",english178,english,male,583,usa
115 | 583,583,57.0,0.0,"huron, south dakota, usa",english179,english,male,584,usa
116 | 584,584,20.0,0.0,"newport, rhode island, usa",english180,english,male,585,usa
117 | 585,585,68.0,0.0,"new eagle, pennsylvania, usa",english181,english,male,586,usa
118 | 589,589,50.0,0.0,"troy, new york, usa",english182,english,male,590,usa
119 | 603,603,38.0,0.0,"oxford, uk",english185,english,male,604,uk
120 | 610,610,35.0,0.0,"edinburgh, scotland, uk",english188,english,male,611,uk
121 | 616,616,19.0,0.0,"grand forks, north dakota, usa",english189,english,male,617,usa
122 | 617,617,27.0,0.0,"oak park, illinois, usa",english190,english,male,618,usa
123 | 630,630,19.0,0.0,"las cruces, new mexico, usa",english191,english,male,631,usa
124 | 636,636,32.0,0.0,"honolulu, hawaii, usa",english193,english,male,637,usa
125 | 637,637,19.0,0.0,"manchester, england, uk",english194,english,male,638,uk
126 | 661,661,22.0,0.0,"iowa city, iowa, usa",english197,english,male,662,usa
127 | 662,662,56.0,0.0,"washington, district of columbia, usa",english198,english,male,663,usa
128 | 663,663,25.0,0.0,"bay shore, new york, usa",english199,english,male,664,usa
129 | 672,672,19.0,0.0,"crisfield, maryland, usa",english202,english,male,673,usa
130 | 677,677,53.0,0.0,"chicago, illinois, usa",english204,english,male,678,usa
131 | 694,694,46.0,0.0,"washington, dc, usa",english208,english,male,695,usa
132 | 719,719,30.0,0.0,"worcester, massachusetts, usa",english211,english,male,720,usa
133 | 728,728,24.0,0.0,"spartanburg, south carolina, usa",english212,english,male,729,usa
134 | 733,733,38.0,0.0,"dodge city, kansas, usa",english213,english,male,734,usa
135 | 736,736,26.0,0.0,"myrtle beach, south carolina, usa",english214,english,male,737,usa
136 | 737,737,20.0,0.0,"lemington spa, warwickshire, uk",english215,english,male,738,uk
137 | 739,739,66.0,0.0,"harrow, middlesex, uk",english217,english,male,740,uk
138 | 753,753,45.0,0.0,"london, uk",english221,english,male,754,uk
139 | 766,766,18.0,0.0,"erie, pennsylvania, usa",english223,english,male,767,usa
140 | 767,767,20.0,0.0,"bridgetown, barbados",english224,english,male,768,barbados
141 | 770,770,50.0,0.0,"dundee, scotland, uk",english225,english,male,771,uk
142 | 771,771,38.0,0.0,"walton-on-thames, surrey, uk",english226,english,male,772,uk
143 | 774,774,25.0,0.0,"burlington, vermont, usa",english229,english,male,775,usa
144 | 775,775,35.0,0.0,"launceston, tasmania, australia",english230,english,male,776,australia
145 | 790,790,22.0,0.0,"tampa, florida, usa",english231,english,male,791,usa
146 | 793,793,49.0,0.0,"bloomington, indiana, usa",english234,english,male,794,usa
147 | 798,798,44.0,0.0,"st. charles, illinois, usa",english236,english,male,799,usa
148 | 802,802,19.0,0.0,"fort kobbe, panama",english239,english,male,803,panama
149 | 814,814,26.0,0.0,"guelph, ontario, canada",english240,english,male,815,canada
150 | 819,819,27.0,0.0,"fredericton, new brunswick, canada",english243,english,male,820,canada
151 | 820,820,23.0,0.0,"san francisco, california, usa",english244,english,male,821,usa
152 | 823,823,25.0,0.0,"pittsburgh, pennsylvania, usa",english245,english,male,824,usa
153 | 824,824,32.0,0.0,"los angeles, california, usa",english246,english,male,825,usa
154 | 834,834,33.0,0.0,"prince rupert, british columbia, canada",english247,english,male,835,canada
155 | 851,851,19.0,0.0,"chester, england, uk",english250,english,male,852,uk
156 | 854,854,21.0,0.0,"mt. kisco, new york, usa",english251,english,male,855,usa
157 | 856,856,21.0,0.0,"dublin, ireland",english253,english,male,857,ireland
158 | 857,857,39.0,0.0,"eugene, oregon, usa",english254,english,male,858,usa
159 | 860,860,23.0,0.0,"laurinburg, north carolina, usa",english256,english,male,861,usa
160 | 862,862,24.0,0.0,"san diego, california, usa",english257,english,male,863,usa
161 | 868,868,24.0,0.0,"kilkenny, ireland",english258,english,male,869,ireland
162 | 869,869,23.0,0.0,"singapore, singapore",english259,english,male,870,singapore
163 | 875,875,60.0,0.0,"naylor, maryland, usa",english262,english,male,876,usa
164 | 883,883,21.0,0.0,"oak forest, illinois, usa",english263,english,male,884,usa
165 | 888,888,21.0,0.0,"freemont, california, usa",english264,english,male,889,usa
166 | 889,889,31.0,0.0,"vancouver wa, washington, usa",english265,english,male,890,usa
167 | 898,898,18.0,0.0,"san diego, california, usa",english266,english,male,899,usa
168 | 899,899,66.0,0.0,"leeds, uk",english267,english,male,900,uk
169 | 902,902,29.0,0.0,"cape town, south africa",english270,english,male,903,south africa
170 | 904,904,20.0,0.0,"dallas, texas, usa",english272,english,male,905,usa
171 | 906,906,46.0,0.0,"blue bell, pennsylvania, usa",english273,english,male,907,usa
172 | 936,936,23.0,0.0,"ramsey mn, minnesota, usa",english281,english,male,937,usa
173 | 938,938,20.0,0.0,"tampa, florida, usa",english282,english,male,939,usa
174 | 950,950,18.0,0.0,"pelham, new york, usa",english283,english,male,951,usa
175 | 951,951,33.0,0.0,"gainesville, florida, usa",english284,english,male,952,usa
176 | 960,960,50.0,0.0,"melbourne, australia",english285,english,male,961,australia
177 | 985,985,20.0,0.0,"duluth, georgia, usa",english290,english,male,986,usa
178 | 1025,1025,18.0,0.0,"belize city, belize",english291,english,male,1026,belize
179 | 1050,1050,38.0,0.0,"kansas city, missouri, usa",english292,english,male,1051,usa
180 | 1066,1066,25.0,0.0,"airdrie, alberta, canada",english295,english,male,1067,canada
181 | 1070,1070,19.0,0.0,"lindenhurst, new york, usa",english297,english,male,1071,usa
182 | 1071,1071,22.0,0.0,"lismore, new south wales, australia",english298,english,male,1072,australia
183 | 1074,1074,59.0,0.0,"windsor, ontario, canada",english299,english,male,1075,canada
184 | 1075,1075,18.0,0.0,"dublin, ireland",english300,english,male,1076,ireland
185 | 1077,1077,41.0,0.0,"toronto, ontario, canada",english301,english,male,1078,canada
186 | 1084,1084,22.0,0.0,"melbourne, australia",english302,english,male,1085,australia
187 | 1099,1099,48.0,0.0,"alexandria, va, usa",english307,english,male,1100,usa
188 | 1100,1100,34.0,0.0,"cardiff, wales, uk",english308,english,male,1101,uk
189 | 1118,1118,28.0,0.0,"silver spring, maryland, usa",english313,english,male,1119,usa
190 | 1120,1120,25.0,0.0,"los angeles, california, usa",english315,english,male,1121,usa
191 | 1121,1121,33.0,0.0,"sacramento, california, usa",english316,english,male,1122,usa
192 | 1131,1131,26.0,0.0,"burlington, vermont, usa",english317,english,male,1132,usa
193 | 1136,1136,46.0,0.0,"sarnia, ontario, canada",english318,english,male,1137,canada
194 | 1137,1137,29.0,0.0,"grimsby, lincolnshire, uk",english319,english,male,1138,uk
195 | 1162,1162,52.0,0.0,"brooklyn, new york, usa",english321,english,male,1163,usa
196 | 1171,1171,29.0,0.0,"yorkton, saskatchewan, canada",english322,english,male,1172,canada
197 | 1173,1173,28.0,0.0,"st. marys, ontario, canada",english324,english,male,1174,canada
198 | 1175,1175,32.0,0.0,"reading, pennsylvania, usa",english325,english,male,1176,usa
199 | 1187,1187,53.0,0.0,"fairborn, ohio, usa",english326,english,male,1188,usa
200 | 1204,1204,27.0,0.0,"dunedin, florida, usa",english327,english,male,1205,usa
201 | 1215,1215,6.0,0.0,"washington, district of columbia, usa",english335,english,male,1216,usa
202 | 1219,1219,54.0,0.0,"chicago, illinois, usa",english339,english,male,1220,usa
203 | 1221,1221,19.0,0.0,"charleston, west virginia, usa",english340,english,male,1222,usa
204 | 1224,1224,33.0,0.0,"kansas city, missouri, usa",english342,english,male,1225,usa
205 | 1225,1225,23.0,0.0,"portland, maine, usa",english343,english,male,1226,usa
206 | 1232,1232,20.0,0.0,"kansas city, missouri, usa",english345,english,male,1233,usa
207 | 1234,1234,60.0,0.0,"east hartford, connecticut, usa",english346,english,male,1235,usa
208 | 1241,1241,37.0,0.0,"carthage, missouri, usa",english351,english,male,1242,usa
209 | 1242,1242,25.0,0.0,"singapore, singapore",english352,english,male,1243,singapore
210 | 1245,1245,18.0,0.0,"saskatoon, saskatchewan, canada",english353,english,male,1246,canada
211 | 1255,1255,19.0,0.0,"los angeles, california, usa",english355,english,male,1256,usa
212 | 1257,1257,24.0,0.0,"alto, georgia, usa",english357,english,male,1258,usa
213 | 1258,1258,19.0,0.0,"fredericton, new brunswick, canada",english358,english,male,1259,canada
214 | 1275,1275,32.0,0.0,"sackville, new brunswick, canada",english359,english,male,1276,canada
215 | 1280,1280,21.0,0.0,"cork, ireland",english362,english,male,1281,ireland
216 | 1286,1286,20.0,0.0,"castle donington, uk",english363,english,male,1287,uk
217 | 1291,1291,52.0,0.0,"oxford, uk",english365,english,male,1292,uk
218 | 1294,1294,43.0,0.0,"strattford-on-avon, uk",english368,english,male,1295,uk
219 | 1296,1296,27.0,0.0,"mcminnville, oregon, usa",english369,english,male,1297,usa
220 | 1301,1301,22.0,0.0,"pondicherry, india",english370,english,male,1302,india
221 | 1306,1306,80.0,0.0,"christiansburg, virginia, usa",english372,english,male,1307,usa
222 | 1307,1307,22.0,0.0,"russellville, kentucky, usa",english373,english,male,1308,usa
223 | 1309,1309,36.0,0.0,"milford haven, wales, uk",english374,english,male,1310,uk
224 | 1311,1311,28.0,0.0,"manchester, connecticut, usa",english375,english,male,1312,usa
225 | 1314,1314,46.0,0.0,"pasadena, california, usa",english376,english,male,1315,usa
226 | 1320,1320,43.0,0.0,"summit, new jersey, usa",english379,english,male,1321,usa
227 | 1323,1323,85.0,0.0,"pike county, kentucky, usa",english381,english,male,1324,usa
228 | 1324,1324,34.0,0.0,"arlington, virginia, usa",english382,english,male,1325,usa
229 | 1326,1326,57.0,0.0,"williamson, west virginia, usa",english384,english,male,1327,usa
230 | 1333,1333,74.0,0.0,"milwaukee, wisconsin, usa",english390,english,male,1334,usa
231 | 1335,1335,62.0,0.0,"warrenton, virginia, usa",english392,english,male,1336,usa
232 | 1339,1339,39.0,0.0,"alexandria, virginia, usa",english395,english,male,1340,usa
233 | 1348,1348,26.0,0.0,"lucerne, switzerland",english401,english,male,1349,switzerland
234 | 1351,1351,29.0,0.0,"windsor, australia",english404,english,male,1352,australia
235 | 1359,1359,47.0,0.0,"windsor, ontario, canada",english407,english,male,1360,canada
236 | 1360,1360,64.0,0.0,"lynwood, california, usa",english408,english,male,1361,usa
237 | 1362,1362,35.0,0.0,"hamilton, new zealand",english410,english,male,1363,new zealand
238 | 1374,1374,32.0,0.0,"woonsocket, rhode island, usa",english415,english,male,1375,usa
239 | 1376,1376,34.0,0.0,"sydney, australia",english416,english,male,1377,australia
240 | 1383,1383,29.0,0.0,"arlington, virginia",english418,english,male,1384,virginia
241 | 1384,1384,27.0,0.0,"youngstown, ohio, usa",english419,english,male,1385,usa
242 | 1386,1386,21.0,0.0,"auckland, new zealand",english420,english,male,1387,new zealand
243 | 1387,1387,38.0,0.0,"landstuhl, germany",english421,english,male,1388,germany
244 | 1389,1389,31.0,0.0,"rochester, new york, usa",english422,english,male,1390,usa
245 | 1408,1408,75.0,0.0,"danville, virginia, usa",english424,english,male,1409,usa
246 | 1409,1409,31.0,0.0,"danville, virginia, usa",english425,english,male,1410,usa
247 | 1468,1468,24.0,0.0,"kansas city, kansas, usa",english434,english,male,1469,usa
248 | 1476,1476,21.0,0.0,"cleveland, ohio, usa",english437,english,male,1477,usa
249 | 1527,1527,23.0,0.0,"salisbury mills, new york, usa",english442,english,male,1528,usa
250 | 1529,1529,31.0,0.0,"fort collins, colorado, usa",english443,english,male,1530,usa
251 | 1530,1530,24.0,0.0,"anniston, alabama, usa",english444,english,male,1531,usa
252 | 1531,1531,60.0,0.0,"san francisco, california, usa",english445,english,male,1532,usa
253 | 1533,1533,21.0,0.0,"hartford, connecticut, usa",english447,english,male,1534,usa
254 | 1535,1535,20.0,0.0,"isle of arran, scotland, uk",english448,english,male,1536,uk
255 | 1537,1537,27.0,0.0,"barton, vermont, usa",english449,english,male,1538,usa
256 | 1543,1543,26.0,0.0,"windsor, uk",english450,english,male,1544,uk
257 | 1545,1545,44.0,0.0,"hazlehurst, georgia, usa",english451,english,male,1546,usa
258 | 1546,1546,24.0,0.0,"newcastle, australia",english452,english,male,1547,australia
259 | 1554,1554,43.0,0.0,"syracuse, new york, usa",english457,english,male,1555,usa
260 | 1559,1559,19.0,0.0,"charlotte, north carolina, usa",english459,english,male,1560,usa
261 | 1563,1563,20.0,0.0,"paducah, kentucky, usa",english462,english,male,1564,usa
262 | 1617,1617,27.0,0.0,"ottawa, ontario, canada",english464,english,male,1618,canada
263 | 1634,1634,20.0,0.0,"aylesbury, buckinghamshire, uk",english465,english,male,1635,uk
264 | 1636,1636,18.0,0.0,"portland, maine, usa",english466,english,male,1637,usa
265 | 1638,1638,23.0,1.0,"pozzuoli, italy",english467,english,male,1639,italy
266 | 1639,1639,30.0,0.0,"myrtle beach, south carolina, usa",english468,english,male,1640,usa
267 | 1640,1640,22.0,0.0,"knoxville, tennessee, usa",english469,english,male,1641,usa
268 | 1641,1641,21.0,0.0,"leighton buzzard, uk",english470,english,male,1642,uk
269 | 1648,1648,26.0,0.0,"rota, spain",english472,english,male,1649,spain
270 | 1649,1649,20.0,0.0,"nottingham, uk",english473,english,male,1650,uk
271 | 1660,1660,19.0,0.0,"sydney, nova scotia, canada",english478,english,male,1661,canada
272 | 1664,1664,58.0,0.0,"winston-salem, north carolina, usa",english480,english,male,1665,usa
273 | 1667,1667,19.0,0.0,"boston, massachusetts, usa",english483,english,male,1668,usa
274 | 1671,1671,24.0,0.0,"dublin, ireland",english484,english,male,1672,ireland
275 | 1673,1673,20.0,0.0,"cromwell, connecticut, usa",english485,english,male,1674,usa
276 | 1708,1708,18.0,0.0,"auckland, new zealand",english490,english,male,1709,new zealand
277 | 1717,1717,20.0,0.0,"miami, florida, usa",english493,english,male,1718,usa
278 | 1718,1718,27.5,0.0,"findlay, ohio, usa",english494,english,male,1719,usa
279 | 1719,1719,21.0,0.0,"kirkland, washington, usa",english495,english,male,1720,usa
280 | 1723,1723,20.0,0.0,"london, england, uk",english496,english,male,1724,uk
281 | 1725,1725,22.0,0.0,"cleveland, ohio, usa",english497,english,male,1726,usa
282 | 1726,1726,27.0,0.0,"prince george, british columbia, canada",english498,english,male,1727,canada
283 | 1732,1732,38.0,0.0,"albury, new south wales, australia",english500,english,male,1733,australia
284 | 1785,1785,25.0,0.0,"west palm beach, florida, usa",english504,english,male,1786,usa
285 | 1799,1799,21.0,0.0,"accra, ghana",english507,english,male,1800,ghana
286 | 1871,1871,57.0,0.0,"youngstown, ohio, usa",english514,english,male,1872,usa
287 | 1875,1875,33.0,0.0,"colorado springs, colorado, usa",english517,english,male,1876,usa
288 | 1877,1877,24.0,0.0,"kells, meath, ireland",english518,english,male,1878,ireland
289 | 1889,1889,19.0,0.0,"downers grove, illinois, usa",english524,english,male,1890,usa
290 | 1894,1894,20.0,0.0,"woodbridge, virginia, usa",english526,english,male,1895,usa
291 | 1961,1961,60.0,0.0,"toronto, ontario, canada",english542,english,male,1962,canada
292 | 1969,1969,31.0,0.0,"hanover, new hampshire, usa",english544,english,male,1970,usa
293 | 1994,1994,56.0,0.0,"tangier island, virginia, usa",english545,english,male,1995,usa
294 | 1996,1996,63.0,0.0,"tangier island, virginia, usa",english547,english,male,1997,usa
295 | 2002,2002,61.0,0.0,"dublin, ireland",english548,english,male,2003,ireland
296 | 2027,2027,82.0,0.0,"fredericksburg, virginia, usa",english550,english,male,2028,usa
297 | 2045,2045,21.0,0.0,"nashville, tennessee, usa",english553,english,male,2046,usa
298 | 2048,2048,90.0,0.0,"brooklyn, new york, usa",english555,english,male,2049,usa
299 | 2054,2054,23.0,0.0,"queens, new york, usa",english558,english,male,2055,usa
300 | 2073,2073,32.0,0.0,"manassas, virginia, usa",english563,english,male,2074,usa
301 | 2076,2076,52.0,0.0,"casper, wyoming, usa",english564,english,male,2077,usa
302 | 2080,2080,22.0,0.0,"derby, england, uk",english566,english,male,2081,uk
303 | 2100,2100,24.0,0.0,"new york, new york, usa",english567,english,male,2101,usa
304 | 2103,2103,24.0,0.0,"sydney, australia",english569,english,male,2104,australia
305 | 2112,2112,39.0,0.0,"washington, dc, usa",english571,english,male,2113,usa
306 | 2120,2120,51.0,0.0,"fort worth, texas, usa",english572,english,male,2121,usa
307 | 2123,2123,46.0,0.0,"painesville, ohio, usa",english573,english,male,2124,usa
308 | 2164,2164,24.0,0.0,"great falls, virginia, usa",english575,english,male,2165,usa
309 | 2170,2170,60.0,0.0,"salford, lancashire, uk",english578,english,male,2171,uk
310 | 2171,2171,22.0,0.0,"adelaide, australia",english579,english,male,2172,australia
311 |
--------------------------------------------------------------------------------